From 52a0fc82433badb540fff42443360f2d2d437bef Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sat, 25 Apr 2026 12:20:55 -0400
Subject: [PATCH 01/13] Strip 'style' and 'script' tags in HTML

---
 src/checks/page-size/page-size-html.ts        |  9 ++-
 src/helpers/html-to-markdown.ts               | 14 ++--
 .../checks/content-start-position.test.ts     | 24 +++----
 test/unit/helpers/html-to-markdown.test.ts    | 66 +++++++++++++++++++
 4 files changed, 90 insertions(+), 23 deletions(-)
 create mode 100644 test/unit/helpers/html-to-markdown.test.ts
diff --git a/src/checks/page-size/page-size-html.ts b/src/checks/page-size/page-size-html.ts
index 65ea027..5af39be 100644
--- a/src/checks/page-size/page-size-html.ts
+++ b/src/checks/page-size/page-size-html.ts
@@ -107,6 +107,9 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
   const convertedSizes = successful.map((r) => r.convertedCharacters).sort((a, b) => a - b);
   const median = convertedSizes[Math.floor(convertedSizes.length / 2)];
   const max = convertedSizes[convertedSizes.length - 1];
+  const htmlSizes = successful.map((r) => r.htmlCharacters).sort((a, b) => a - b);
+  const medianHtml = htmlSizes[Math.floor(htmlSizes.length / 2)];
+  const maxHtml = htmlSizes[htmlSizes.length - 1];
   const avgRatio = Math.round(
     successful.reduce((sum, r) => sum + r.conversionRatio, 0) / successful.length,
   );
@@ -124,11 +127,11 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
 
   let message: string;
   if (overallStatus === 'pass') {
-    message = `All ${successful.length} ${pageLabel} convert under ${formatSize(passThreshold)} chars (median ${formatSize(median)}, ${avgRatio}% boilerplate)${suffix}`;
+    message = `All ${successful.length} ${pageLabel} under ${formatSize(passThreshold)} chars (median ${formatSize(medianHtml)} HTML → ${formatSize(median)} markdown (${avgRatio}% boilerplate))${suffix}`;
   } else if (overallStatus === 'warn') {
-    message = `${warnBucket} of ${successful.length} ${pageLabel} convert to ${formatSize(passThreshold)}–${formatSize(failThreshold)} chars (max ${formatSize(max)}, ${avgRatio}% boilerplate)${suffix}`;
+    message = `${warnBucket} of ${successful.length} ${pageLabel} convert to ${formatSize(passThreshold)}–${formatSize(failThreshold)} chars (max ${formatSize(maxHtml)} HTML → ${formatSize(max)} markdown (${avgRatio}% boilerplate))${suffix}`;
   } else {
-    message = `${failBucket} of ${successful.length} ${pageLabel} convert to over ${formatSize(failThreshold)} chars (max ${formatSize(max)}, ${avgRatio}% boilerplate)${suffix}`;
+    message = `${failBucket} of ${successful.length} ${pageLabel} convert to over ${formatSize(failThreshold)} chars (max ${formatSize(maxHtml)} HTML → ${formatSize(max)} markdown (${avgRatio}% boilerplate))${suffix}`;
   }
 
   return {
diff --git a/src/helpers/html-to-markdown.ts b/src/helpers/html-to-markdown.ts
index 53d5ca9..6d8b523 100644
--- a/src/helpers/html-to-markdown.ts
+++ b/src/helpers/html-to-markdown.ts
@@ -1,15 +1,13 @@
+import { parse } from 'node-html-parser';
 import TurndownService from 'turndown';
 import { tables } from 'turndown-plugin-gfm';
 
-/**
- * Convert HTML to markdown using Turndown with default configuration.
- * Matches real agent behavior per the Agent-Friendly Documentation Spec:
- * no explicit <style>/<script> stripping, default options only.
- * The GFM tables plugin is enabled so HTML tables are preserved as markdown
- * tables rather than being flattened to plain text.
- */
 export function htmlToMarkdown(html: string): string {
+  const root = parse(html);
+  for (const el of root.querySelectorAll('script, style')) {
+    el.remove();
+  }
   const turndown = new TurndownService();
   turndown.use(tables);
-  return turndown.turndown(html);
+  return turndown.turndown(root.toString());
 }
diff --git a/test/unit/checks/content-start-position.test.ts b/test/unit/checks/content-start-position.test.ts
index c98f7fc..ba9e9da 100644
--- a/test/unit/checks/content-start-position.test.ts
+++ b/test/unit/checks/content-start-position.test.ts
@@ -270,12 +270,12 @@ describe('content-start-position', () => {
   // ── Status threshold: fail (>50%) ──
 
   it('fails when content starts past 50%', async () => {
-    // Massive CSS boilerplate (leaks through Turndown) before a tiny heading
-    const cssRules = Array.from(
-      { length: 200 },
-      (_, i) => `.class${i} { color: red; margin: ${i}px; }`,
-    ).join('\n');
-    const html = `<html><head><style>${cssRules}</style></head><body><h3>Tiny Content</h3></body></html>`;
+    // Massive nav boilerplate before a tiny content section
+    const navLinks = Array.from(
+      { length: 100 },
+      (_, i) => `<li><a href="/nav${i}">Navigation Link ${i}</a></li>`,
+    ).join('');
+    const html = `<html><body><nav><ul>${navLinks}</ul></nav><h3>Tiny Content</h3><p>A short paragraph of documentation.</p></body></html>`;
 
     server.use(
       http.get(
@@ -305,17 +305,17 @@ describe('content-start-position', () => {
       ),
     );
 
-    // Page 2: massive CSS boilerplate before content (fail)
-    const cssRules = Array.from(
-      { length: 200 },
-      (_, i) => `.c${i} { color: red; margin: ${i}px; }`,
-    ).join('\n');
+    // Page 2: massive nav boilerplate before content (fail)
+    const navLinks = Array.from(
+      { length: 100 },
+      (_, i) => `<li><a href="/nav${i}">Navigation Link ${i}</a></li>`,
+    ).join('');
     server.use(
       http.get(
         'http://test.local/docs/bad',
         () =>
           new HttpResponse(
-            `<html><head><style>${cssRules}</style></head><body><h3>Late Content</h3></body></html>`,
+            `<html><body><nav><ul>${navLinks}</ul></nav><h3>Late Content</h3><p>A short paragraph.</p></body></html>`,
             { status: 200, headers: { 'Content-Type': 'text/html' } },
           ),
       ),
diff --git a/test/unit/helpers/html-to-markdown.test.ts b/test/unit/helpers/html-to-markdown.test.ts
new file mode 100644
index 0000000..8489603
--- /dev/null
+++ b/test/unit/helpers/html-to-markdown.test.ts
@@ -0,0 +1,66 @@
+import { describe, it, expect } from 'vitest';
+import { htmlToMarkdown } from '../../../src/helpers/html-to-markdown.js';
+
+describe('htmlToMarkdown', () => {
+  it('converts basic HTML to markdown', () => {
+    const html = '<html><body><h1>Title</h1><p>Hello world.</p></body></html>';
+    const md = htmlToMarkdown(html);
+    expect(md).toContain('Title');
+    expect(md).toContain('Hello world.');
+  });
+
+  it('strips <script> elements and their contents', () => {
+    const html = `<html><body>
+      <script>const x = 42; console.log(x);</script>
+      <h1>Title</h1>
+      <script type="application/json">{"key": "value"}</script>
+      <p>Content.</p>
+    </body></html>`;
+    const md = htmlToMarkdown(html);
+    expect(md).toContain('Title');
+    expect(md).toContain('Content.');
+    expect(md).not.toContain('const x = 42');
+    expect(md).not.toContain('console.log');
+    expect(md).not.toContain('"key"');
+  });
+
+  it('strips <style> elements and their contents', () => {
+    const html = `<html><head>
+      <style>.nav { color: red; margin: 10px; font-size: 14px; }</style>
+    </head><body>
+      <h1>Title</h1>
+      <style>body { background: blue; }</style>
+      <p>Content.</p>
+    </body></html>`;
+    const md = htmlToMarkdown(html);
+    expect(md).toContain('Title');
+    expect(md).toContain('Content.');
+    expect(md).not.toContain('color: red');
+    expect(md).not.toContain('background: blue');
+    expect(md).not.toContain('.nav');
+  });
+
+  it('strips both <script> and <style> while preserving content', () => {
+    const css = Array.from({ length: 50 }, (_, i) => `.c${i} { color: red; }`).join('\n');
+    const js = 'function init() { document.getElementById("app").render(); }';
+    const html = `<html><head><style>${css}</style></head><body>
+      <script>${js}</script>
+      <h1>Documentation</h1>
+      <p>This is the real content.</p>
+    </body></html>`;
+    const md = htmlToMarkdown(html);
+    expect(md).toContain('Documentation');
+    expect(md).toContain('This is the real content.');
+    expect(md).not.toContain('color: red');
+    expect(md).not.toContain('document.getElementById');
+  });
+
+  it('preserves HTML tables as markdown tables', () => {
+    const html = `<table><tr><th>Name</th><th>Value</th></tr>
+      <tr><td>foo</td><td>bar</td></tr></table>`;
+    const md = htmlToMarkdown(html);
+    expect(md).toContain('Name');
+    expect(md).toContain('foo');
+    expect(md).toContain('|');
+  });
+});

From 9ca4499e0ab37aca4d4a0c58fef3aa4fd3f1095d Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sat, 25 Apr 2026 12:40:02 -0400
Subject: [PATCH 02/13] Rename 'llms-txt-freshness' to 'llms-txt-coverage'

---
 SCORING.md                                    |  6 +--
 docs/agent-score-calculation.md               |  8 ++--
 docs/checks/content-discoverability.md        |  2 +-
 docs/checks/index.md                          |  2 +-
 docs/checks/observability.md                  | 12 +++---
 docs/generate_llms_txt                        |  2 +-
 docs/improve-your-score.md                    |  4 +-
 docs/interaction-diagnostics.md               |  2 +-
 docs/public/llms.txt                          |  2 +-
 docs/run-locally.md                           |  2 +-
 scoring-reference.md                          |  8 ++--
 src/checks/index.ts                           |  2 +-
 ...-txt-freshness.ts => llms-txt-coverage.ts} | 16 +++----
 src/helpers/get-page-urls.ts                  |  2 +-
 src/scoring/coefficients.ts                   |  2 +-
 src/scoring/proportions.ts                    |  4 +-
 src/scoring/resolutions.ts                    |  2 +-
 src/scoring/tag-scores.ts                     |  2 +-
 src/scoring/weights.ts                        |  2 +-
 src/types.ts                                  |  2 +-
 test/integration/check-pipeline.test.ts       | 12 +++---
 ...ness.test.ts => llms-txt-coverage.test.ts} | 42 +++++++++----------
 test/unit/scoring/coefficients.test.ts        |  2 +-
 test/unit/scoring/proportions.test.ts         |  8 ++--
 test/unit/scoring/resolutions.test.ts         |  2 +-
 test/unit/scoring/score.test.ts               |  4 +-
 26 files changed, 77 insertions(+), 77 deletions(-)
 rename src/checks/observability/{llms-txt-freshness.ts => llms-txt-coverage.ts} (97%)
 rename test/unit/checks/{llms-txt-freshness.test.ts => llms-txt-coverage.test.ts} (96%)

diff --git a/SCORING.md b/SCORING.md
index 924d59c..9e57247 100644
--- a/SCORING.md
+++ b/SCORING.md
@@ -88,7 +88,7 @@ Whether agent-facing resources stay accurate over time.
 
 | Check                                                                              | Weight     | What it measures                                                                                           |
 | ---------------------------------------------------------------------------------- | ---------- | ---------------------------------------------------------------------------------------------------------- |
-| [llms-txt-freshness](https://agentdocsspec.com/spec/#llms-txt-freshness)           | Medium (4) | Whether your llms.txt reflects your current site. A stale index sends agents to outdated or missing pages. |
+| [llms-txt-coverage](https://agentdocsspec.com/spec/#llms-txt-coverage)             | Medium (4) | Whether your llms.txt reflects your current site. A stale index sends agents to outdated or missing pages. |
 | [markdown-content-parity](https://agentdocsspec.com/spec/#markdown-content-parity) | Medium (4) | Whether markdown and HTML versions of pages contain the same content.                                      |
 | [cache-header-hygiene](https://agentdocsspec.com/spec/#cache-header-hygiene)       | Low (2)    | Whether cache lifetimes allow content updates to reach agents in a reasonable timeframe.                   |
 
@@ -128,7 +128,7 @@ Not all warnings represent the same degree of degradation. A warning on `llms-tx
 
 | Coefficient | Meaning                                  | Checks                                                                                                                                                                                                                                                                                 |
 | ----------- | ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **0.75**    | Content substantively intact             | `llms-txt-valid`, `content-negotiation`, `llms-txt-links-resolve`, `llms-txt-freshness`, `markdown-content-parity`                                                                                                                                                                     |
+| **0.75**    | Content substantively intact             | `llms-txt-valid`, `content-negotiation`, `llms-txt-links-resolve`, `llms-txt-coverage`, `markdown-content-parity`                                                                                                                                                                      |
 | **0.60**    | Partial coverage or platform-dependent   | `llms-txt-directive`, `redirect-behavior`                                                                                                                                                                                                                                              |
 | **0.50**    | Genuine functional degradation           | `llms-txt-exists`, `llms-txt-size`, `rendering-strategy`, `markdown-url-support`, `page-size-markdown`, `page-size-html`, `content-start-position`, `tabbed-content-serialization`, `section-header-quality`, `cache-header-hygiene`, `auth-gate-detection`, `auth-alternative-access` |
 | **0.25**    | Actively steering agents to a worse path | `llms-txt-links-markdown` (markdown exists but llms.txt links to HTML; agents don't discover .md variants on their own)                                                                                                                                                                |
@@ -237,7 +237,7 @@ If pages are SPA shells, measuring HTML quality is meaningless. This coefficient
 
 ### Index truncation coefficient
 
-**Affects**: `llms-txt-links-resolve`, `llms-txt-valid`, `llms-txt-freshness`, `llms-txt-links-markdown`
+**Affects**: `llms-txt-links-resolve`, `llms-txt-valid`, `llms-txt-coverage`, `llms-txt-links-markdown`
 
 If your llms.txt is truncated, agents only see part of the index. Measuring the quality of the invisible portion doesn't reflect agent experience.
 
diff --git a/docs/agent-score-calculation.md b/docs/agent-score-calculation.md
index bed91f0..640f4ab 100644
--- a/docs/agent-score-calculation.md
+++ b/docs/agent-score-calculation.md
@@ -74,7 +74,7 @@ Every check is assigned a weight tier based on its observed impact on agent work
 
 | Check                     | Weight     |
 | ------------------------- | ---------- |
-| `llms-txt-freshness`      | Medium (4) |
+| `llms-txt-coverage`       | Medium (4) |
 | `markdown-content-parity` | Medium (4) |
 | `cache-header-hygiene`    | Low (2)    |
 
@@ -123,7 +123,7 @@ These checks test a single site-wide resource and produce one pass, warn, or fai
 | `llms-txt-size`           | Whether the llms.txt fits within agent context limits  |
 | `llms-txt-links-resolve`  | Whether links in the llms.txt return 200               |
 | `llms-txt-links-markdown` | Whether llms.txt links point to markdown content       |
-| `llms-txt-freshness`      | Whether the llms.txt reflects the current site state   |
+| `llms-txt-coverage`       | Whether the llms.txt reflects the current site state   |
 
 Note that the llms.txt link checks (`llms-txt-links-resolve`, `llms-txt-links-markdown`) do test multiple URLs, but they test the links _within_ the llms.txt file rather than sampling pages from the site. Their result is a single pass/warn/fail based on the overall resolution or markdown rate.
 
@@ -133,7 +133,7 @@ A warning is not a binary "half credit." Different warnings represent different
 
 | Coefficient | Meaning                                  | Checks                                                                                                                                                                                                                                                                                 |
 | ----------- | ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **0.75**    | Content substantively intact             | `llms-txt-valid`, `content-negotiation`, `llms-txt-links-resolve`, `llms-txt-freshness`, `markdown-content-parity`                                                                                                                                                                     |
+| **0.75**    | Content substantively intact             | `llms-txt-valid`, `content-negotiation`, `llms-txt-links-resolve`, `llms-txt-coverage`, `markdown-content-parity`                                                                                                                                                                      |
 | **0.60**    | Partial coverage or platform-dependent   | `llms-txt-directive`, `redirect-behavior`                                                                                                                                                                                                                                              |
 | **0.50**    | Genuine functional degradation           | `llms-txt-exists`, `llms-txt-size`, `rendering-strategy`, `markdown-url-support`, `page-size-markdown`, `page-size-html`, `content-start-position`, `tabbed-content-serialization`, `section-header-quality`, `cache-header-hygiene`, `auth-gate-detection`, `auth-alternative-access` |
 | **0.25**    | Actively steering agents to a worse path | `llms-txt-links-markdown` (markdown exists but llms.txt links to HTML)                                                                                                                                                                                                                 |
@@ -184,7 +184,7 @@ If pages are SPA shells, measuring HTML quality is meaningless. This coefficient
 
 ### Index truncation coefficient
 
-**Affects**: `llms-txt-links-resolve`, `llms-txt-valid`, `llms-txt-freshness`, `llms-txt-links-markdown`
+**Affects**: `llms-txt-links-resolve`, `llms-txt-valid`, `llms-txt-coverage`, `llms-txt-links-markdown`
 
 If your llms.txt is truncated, agents only see part of the index. Measuring the quality of the invisible portion doesn't reflect real agent experience.
 
diff --git a/docs/checks/content-discoverability.md b/docs/checks/content-discoverability.md
index 85534e9..b2d2fc8 100644
--- a/docs/checks/content-discoverability.md
+++ b/docs/checks/content-discoverability.md
@@ -43,7 +43,7 @@ If your `llms.txt` lives at a location not covered by these candidates, AFDocs w
 
 ### Canonical selection
 
-When more than one candidate returns a file (e.g. an apex `llms.txt` for the marketing site _and_ a `/docs/llms.txt` for the docs section), AFDocs picks one as **canonical**. The canonical file is the single source of truth for downstream checks: link sampling, size, validation, freshness, and link-resolution all operate on it alone. Other discovered files still appear in `details.discoveredFiles` for visibility, and `cache-header-hygiene` still verifies headers on every llms.txt found.
+When more than one candidate returns a file (e.g. an apex `llms.txt` for the marketing site _and_ a `/docs/llms.txt` for the docs section), AFDocs picks one as **canonical**. The canonical file is the single source of truth for downstream checks: link sampling, size, validation, coverage, and link-resolution all operate on it alone. Other discovered files still appear in `details.discoveredFiles` for visibility, and `cache-header-hygiene` still verifies headers on every llms.txt found.
 
 The selection rule is _most-specific-to-the-baseUrl wins_. AFDocs picks the file whose directory is the longest prefix of the URL you passed. For example:
 
diff --git a/docs/checks/index.md b/docs/checks/index.md
index dd620e6..7af8b4b 100644
--- a/docs/checks/index.md
+++ b/docs/checks/index.md
@@ -33,7 +33,7 @@ Some checks depend on others. If a dependency doesn't pass, the dependent check
 - `page-size-markdown` requires `markdown-url-support` or `content-negotiation`
 - `section-header-quality` requires `tabbed-content-serialization`
 - `markdown-code-fence-validity` requires `markdown-url-support` or `content-negotiation`
-- `llms-txt-freshness` requires `llms-txt-exists`
+- `llms-txt-coverage` requires `llms-txt-exists`
 - `markdown-content-parity` requires `markdown-url-support` or `content-negotiation`
 - `auth-alternative-access` requires `auth-gate-detection` (warn or fail)
 
diff --git a/docs/checks/observability.md b/docs/checks/observability.md
index cdd513f..5784dde 100644
--- a/docs/checks/observability.md
+++ b/docs/checks/observability.md
@@ -2,15 +2,15 @@
 
 Whether agent-facing resources stay accurate over time. Getting `llms.txt` and markdown support working is the hard part; keeping them working is a different problem. These checks catch the silent failures: a stale index, drifting content between formats, and cache headers that delay updates.
 
-## llms-txt-freshness
+## llms-txt-coverage
 
 Whether your `llms.txt` reflects the current state of your documentation site.
 
-|                |                                                                          |
-| -------------- | ------------------------------------------------------------------------ |
-| **Weight**     | Medium (4)                                                               |
-| **Depends on** | `llms-txt-exists`                                                        |
-| **Spec**       | [llms-txt-freshness](https://agentdocsspec.com/spec/#llms-txt-freshness) |
+|                |                                                                        |
+| -------------- | ---------------------------------------------------------------------- |
+| **Weight**     | Medium (4)                                                             |
+| **Depends on** | `llms-txt-exists`                                                      |
+| **Spec**       | [llms-txt-coverage](https://agentdocsspec.com/spec/#llms-txt-coverage) |
 
 ### Why it matters
 
diff --git a/docs/generate_llms_txt b/docs/generate_llms_txt
index 7295825..5a19db3 100755
--- a/docs/generate_llms_txt
+++ b/docs/generate_llms_txt
@@ -44,7 +44,7 @@ echo "- [Markdown Availability](${BASE_URL}/checks/markdown-availability.md): .m
 echo "- [Page Size](${BASE_URL}/checks/page-size.md): rendering strategy, HTML/markdown size, content start position" >> "$OUTPUT"
 echo "- [Content Structure](${BASE_URL}/checks/content-structure.md): tabbed content serialization, header quality, code fence validity" >> "$OUTPUT"
 echo "- [URL Stability](${BASE_URL}/checks/url-stability.md): HTTP status codes, redirect behavior" >> "$OUTPUT"
-echo "- [Observability](${BASE_URL}/checks/observability.md): llms.txt freshness, markdown content parity, cache headers" >> "$OUTPUT"
+echo "- [Observability](${BASE_URL}/checks/observability.md): llms.txt coverage, markdown content parity, cache headers" >> "$OUTPUT"
 echo "- [Authentication](${BASE_URL}/checks/authentication.md): auth gate detection, alternative access paths" >> "$OUTPUT"
 
 cat >> "$OUTPUT" << 'SECTION'
diff --git a/docs/improve-your-score.md b/docs/improve-your-score.md
index 85e706b..05d68cd 100644
--- a/docs/improve-your-score.md
+++ b/docs/improve-your-score.md
@@ -67,7 +67,7 @@ Not all fixes are equal. Here are the highest-impact changes, ordered by the sco
 
 If `llms-txt-exists` fails, create an `llms.txt` at your site root listing your documentation pages with markdown links. See the [llms.txt specification](https://llmstxt.org/) for the format.
 
-This also unblocks five dependent checks (`llms-txt-valid`, `llms-txt-size`, `llms-txt-links-resolve`, `llms-txt-links-markdown`, `llms-txt-freshness`) that are currently skipped.
+This also unblocks five dependent checks (`llms-txt-valid`, `llms-txt-size`, `llms-txt-links-resolve`, `llms-txt-links-markdown`, `llms-txt-coverage`) that are currently skipped.
 
 **Enable server-side rendering**
 
@@ -112,7 +112,7 @@ These are worth addressing but won't move the score as dramatically:
 - **Tabbed content** (`tabbed-content-serialization`): If tabbed UI components create oversized output, consider restructuring into separate pages or using query params to retrieve only specific tab versions.
 - **Code fence validity** (`markdown-code-fence-validity`): Fix unclosed code fences in your markdown sources.
 - **Redirect behavior** (`redirect-behavior`): Replace JavaScript and cross-host redirects with standard HTTP redirects.
-- **llms.txt freshness** (`llms-txt-freshness`): Generate llms.txt at build time to keep it in sync with your site.
+- **llms.txt coverage** (`llms-txt-coverage`): Generate llms.txt at build time to keep it in sync with your site.
 - **Content parity** (`markdown-content-parity`): Ensure markdown and HTML versions of pages contain the same content.
 - **llms.txt validity** (`llms-txt-valid`): Follow the [llmstxt.org](https://llmstxt.org/) structure.
 
diff --git a/docs/interaction-diagnostics.md b/docs/interaction-diagnostics.md
index 91ae2bc..10a6227 100644
--- a/docs/interaction-diagnostics.md
+++ b/docs/interaction-diagnostics.md
@@ -22,7 +22,7 @@ These diagnostics appear in the "Interaction Diagnostics" section of the `--form
 
 **What to do**: Split into a root llms.txt that links to section-level llms.txt files, each under 50,000 characters. The [llms-txt-size check](/checks/content-discoverability#llms-txt-size) details the thresholds.
 
-**Score impact**: The index truncation coefficient scales down `llms-txt-links-resolve`, `llms-txt-valid`, `llms-txt-freshness`, and `llms-txt-links-markdown` proportionally. A file that's twice the limit counts those checks at roughly half weight.
+**Score impact**: The index truncation coefficient scales down `llms-txt-links-resolve`, `llms-txt-valid`, `llms-txt-coverage`, and `llms-txt-links-markdown` proportionally. A file that's twice the limit counts those checks at roughly half weight.
 
 ## SPA shells invalidate HTML path
 
diff --git a/docs/public/llms.txt b/docs/public/llms.txt
index 5887f8a..181ad1c 100644
--- a/docs/public/llms.txt
+++ b/docs/public/llms.txt
@@ -24,7 +24,7 @@
 - [Page Size](https://afdocs.dev/checks/page-size.md): rendering strategy, HTML/markdown size, content start position
 - [Content Structure](https://afdocs.dev/checks/content-structure.md): tabbed content serialization, header quality, code fence validity
 - [URL Stability](https://afdocs.dev/checks/url-stability.md): HTTP status codes, redirect behavior
-- [Observability](https://afdocs.dev/checks/observability.md): llms.txt freshness, markdown content parity, cache headers
+- [Observability](https://afdocs.dev/checks/observability.md): llms.txt coverage, markdown content parity, cache headers
 - [Authentication](https://afdocs.dev/checks/authentication.md): auth gate detection, alternative access paths
 
 ## API Reference
diff --git a/docs/run-locally.md b/docs/run-locally.md
index 61883b6..5b331bf 100644
--- a/docs/run-locally.md
+++ b/docs/run-locally.md
@@ -85,7 +85,7 @@ Some checks may behave differently against a local server:
 
 ## Production URLs in local builds
 
-When you build your site locally, generated files like `llms.txt` and `sitemap.xml` typically contain your production domain. AFDocs sees URLs pointing to `https://docs.example.com` but you're testing `http://localhost:3000`, so origin comparisons fail and checks like `llms-txt-freshness` report 0% coverage.
+When you build your site locally, generated files like `llms.txt` and `sitemap.xml` typically contain your production domain. AFDocs sees URLs pointing to `https://docs.example.com` but you're testing `http://localhost:3000`, so origin comparisons fail and checks like `llms-txt-coverage` report 0% coverage.
 
 Use `--canonical-origin` to tell AFDocs which production domain to rewrite:
 
diff --git a/scoring-reference.md b/scoring-reference.md
index 3e26294..1618daa 100644
--- a/scoring-reference.md
+++ b/scoring-reference.md
@@ -61,7 +61,7 @@ and the empirical evidence sections in each check definition.
 | `content-start-position`       | Medium   | 4      | Boilerplate preamble on HTML path wastes truncation budget.                                                                                           |
 | `tabbed-content-serialization` | Medium   | 4      | Tabbed content can be catastrophic but only affects pages that use it.                                                                                |
 | `markdown-code-fence-validity` | Medium   | 4      | Unclosed fences corrupt all content after the break point.                                                                                            |
-| `llms-txt-freshness`           | Medium   | 4      | Stale index is a slow failure mode; broken links catch the acute version.                                                                             |
+| `llms-txt-coverage`            | Medium   | 4      | Stale index is a slow failure mode; broken links catch the acute version.                                                                             |
 | `markdown-content-parity`      | Medium   | 4      | Content drift between markdown and HTML leaves agents with outdated info.                                                                             |
 | `auth-alternative-access`      | Medium   | 4      | Partial mitigation for auth-gated sites.                                                                                                              |
 | `redirect-behavior`            | Medium   | 4      | Cross-host redirects are a known friction point for some agents.                                                                                      |
@@ -108,7 +108,7 @@ Each check has a specific warn coefficient rather than a uniform default.
 | `llms-txt-valid`                                             | 0.75       | Non-standard structure, but links are parseable. Missing a blockquote doesn't prevent navigation.                                                                                            |
 | `content-negotiation`                                        | 0.75       | Agent gets the markdown content; wrong Content-Type may prevent optimizations but the content itself is correct.                                                                             |
 | `llms-txt-links-resolve`                                     | 0.75       | >90% of links work. A few broken links is a maintenance issue, not a structural one.                                                                                                         |
-| `llms-txt-freshness`                                         | 0.75       | 80-95% of pages covered. Most of the site is represented in the index.                                                                                                                       |
+| `llms-txt-coverage`                                          | 0.75       | 80-95% of pages covered. Most of the site is represented in the index.                                                                                                                       |
 | `markdown-content-parity`                                    | 0.75       | Minor formatting differences, not substantive content drift.                                                                                                                                 |
 | **0.60: Partial coverage or platform-dependent**             |            |                                                                                                                                                                                              |
 | `llms-txt-directive`                                         | 0.60       | Present on some pages but not others. Agents that land on covered pages benefit; others get no guidance.                                                                                     |
@@ -172,7 +172,7 @@ Single-resource checks (no proportional scoring needed):
 | `llms-txt-size`           | Per-file average (see note below)                             |
 | `llms-txt-links-resolve`  | Uses resolve rate directly from details (`resolveRate` field) |
 | `llms-txt-links-markdown` | Percentage-based status                                       |
-| `llms-txt-freshness`      | Coverage percentage                                           |
+| `llms-txt-coverage`       | Coverage percentage                                           |
 | `auth-alternative-access` | Binary: alternative path exists or doesn't                    |
 
 For `llms-txt-links-resolve`, the `resolveRate` field in details (a 0-1 float)
@@ -317,7 +317,7 @@ of the HTML path as a whole).
 ### Index Truncation Coefficient
 
 **Applies to**: `llms-txt-links-resolve`, `llms-txt-valid`,
-`llms-txt-freshness`, `llms-txt-links-markdown`
+`llms-txt-coverage`, `llms-txt-links-markdown`
 
 If `llms-txt-size` fails, agents only see a fraction of the index. The quality
 of the invisible portion doesn't affect agent experience.
diff --git a/src/checks/index.ts b/src/checks/index.ts
index 6f9ec55..342b0c3 100644
--- a/src/checks/index.ts
+++ b/src/checks/index.ts
@@ -28,7 +28,7 @@ import './url-stability/http-status-codes.js';
 import './url-stability/redirect-behavior.js';
 
 // Category 6: Observability
-import './observability/llms-txt-freshness.js';
+import './observability/llms-txt-coverage.js';
 import './observability/markdown-content-parity.js';
 import './observability/cache-header-hygiene.js';
 
diff --git a/src/checks/observability/llms-txt-freshness.ts b/src/checks/observability/llms-txt-coverage.ts
similarity index 97%
rename from src/checks/observability/llms-txt-freshness.ts
rename to src/checks/observability/llms-txt-coverage.ts
index 0f46574..30dddb8 100644
--- a/src/checks/observability/llms-txt-freshness.ts
+++ b/src/checks/observability/llms-txt-coverage.ts
@@ -202,13 +202,13 @@ const COVERAGE_PASS = 0.95;
 const COVERAGE_WARN = 0.8;
 
 /**
- * Maximum sitemap URLs to collect for freshness comparison.
+ * Maximum sitemap URLs to collect for coverage comparison.
  * Higher than the default MAX_SITEMAP_URLS (500) used for page sampling,
- * because freshness needs the full sitemap to produce meaningful coverage
+ * because coverage needs the full sitemap to produce meaningful coverage
  * percentages. Enterprise docs sites (Stripe, MongoDB) can have thousands
  * of pages.
  */
-const MAX_FRESHNESS_SITEMAP_URLS = 50_000;
+const MAX_COVERAGE_SITEMAP_URLS = 50_000;
 
 /**
  * Try to fetch a docs-specific sitemap at {baseUrl}/sitemap.xml.
@@ -271,7 +271,7 @@ function scopeUrls(urls: string[], origin: string, baseUrlPath: string): string[
 }
 
 async function check(ctx: CheckContext): Promise<CheckResult> {
-  const id = 'llms-txt-freshness';
+  const id = 'llms-txt-coverage';
   const category = 'observability';
 
   // 1. Get llms.txt page URLs (with progressive disclosure walking)
@@ -291,7 +291,7 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
   const effectiveOrigin = ctx.effectiveOrigin ?? ctx.origin;
   const sitemapWarnings: string[] = [];
   let sitemapUrls = await getUrlsFromSitemap(ctx, sitemapWarnings, {
-    maxUrls: MAX_FRESHNESS_SITEMAP_URLS,
+    maxUrls: MAX_COVERAGE_SITEMAP_URLS,
     originOverride: effectiveOrigin,
     skipRefinement: true,
   });
@@ -317,7 +317,7 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
       category,
       status: 'skip',
       message:
-        'No sitemap found; cannot assess llms.txt freshness without a sitemap as ground truth',
+        'No sitemap found; cannot assess llms.txt coverage without a sitemap as ground truth',
       details: { sitemapWarnings },
     };
   }
@@ -478,9 +478,9 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
 }
 
 registerCheck({
-  id: 'llms-txt-freshness',
+  id: 'llms-txt-coverage',
   category: 'observability',
-  description: 'Whether llms.txt reflects the current state of the site',
+  description: 'How much of the site is represented in llms.txt',
   dependsOn: ['llms-txt-exists'],
   run: check,
 });
diff --git a/src/helpers/get-page-urls.ts b/src/helpers/get-page-urls.ts
index 09d5737..ba74302 100644
--- a/src/helpers/get-page-urls.ts
+++ b/src/helpers/get-page-urls.ts
@@ -625,7 +625,7 @@ export interface SitemapOptions {
   maxUrls?: number;
   originOverride?: string;
   pathFilterBase?: string;
-  /** Skip URL-level locale/version refinement. Use when the caller needs raw URLs (e.g. freshness coverage). */
+  /** Skip URL-level locale/version refinement. Use when the caller needs raw URLs (e.g. coverage check). */
   skipRefinement?: boolean;
 }
 
diff --git a/src/scoring/coefficients.ts b/src/scoring/coefficients.ts
index f7c6889..6ed4a9c 100644
--- a/src/scoring/coefficients.ts
+++ b/src/scoring/coefficients.ts
@@ -88,7 +88,7 @@ function getHtmlPathCoefficient(results: Map<string, CheckResult>): number {
 const INDEX_TRUNCATION_CHECKS = new Set([
   'llms-txt-links-resolve',
   'llms-txt-valid',
-  'llms-txt-freshness',
+  'llms-txt-coverage',
   'llms-txt-links-markdown',
 ]);
 
diff --git a/src/scoring/proportions.ts b/src/scoring/proportions.ts
index 0a69afb..5dd4d17 100644
--- a/src/scoring/proportions.ts
+++ b/src/scoring/proportions.ts
@@ -91,7 +91,7 @@ const PROPORTION_EXTRACTORS: Record<string, ProportionExtractor> = {
   'llms-txt-links-markdown': llmsTxtLinksMarkdownExtractor,
 
   // --- Percentage-based single-value checks ---
-  'llms-txt-freshness': llmsTxtFreshnessExtractor,
+  'llms-txt-coverage': llmsTxtCoverageExtractor,
 };
 
 // ---------------------------------------------------------------------------
@@ -450,7 +450,7 @@ function llmsTxtLinksMarkdownExtractor(result: CheckResult): ProportionResult |
   };
 }
 
-function llmsTxtFreshnessExtractor(result: CheckResult): ProportionResult | undefined {
+function llmsTxtCoverageExtractor(result: CheckResult): ProportionResult | undefined {
   const d = result.details;
   if (!d) return undefined;
 
diff --git a/src/scoring/resolutions.ts b/src/scoring/resolutions.ts
index d1b2a98..254ce75 100644
--- a/src/scoring/resolutions.ts
+++ b/src/scoring/resolutions.ts
@@ -272,7 +272,7 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
     },
   },
 
-  'llms-txt-freshness': {
+  'llms-txt-coverage': {
     warn: (d) => {
       const missing = (d.missingCount as number) ?? 0;
       return (
diff --git a/src/scoring/tag-scores.ts b/src/scoring/tag-scores.ts
index 86cd71a..f2601c6 100644
--- a/src/scoring/tag-scores.ts
+++ b/src/scoring/tag-scores.ts
@@ -119,7 +119,7 @@ const SINGLE_RESOURCE_CHECKS = new Set([
   'llms-txt-size',
   'llms-txt-links-resolve',
   'llms-txt-links-markdown',
-  'llms-txt-freshness',
+  'llms-txt-coverage',
 ]);
 
 /**
diff --git a/src/scoring/weights.ts b/src/scoring/weights.ts
index c139262..4e61bed 100644
--- a/src/scoring/weights.ts
+++ b/src/scoring/weights.ts
@@ -45,7 +45,7 @@ export const CHECK_WEIGHTS: Record<string, CheckWeight> = {
   'content-start-position': w('medium', 0.5),
   'tabbed-content-serialization': w('medium', 0.5),
   'markdown-code-fence-validity': w('medium'),
-  'llms-txt-freshness': w('medium', 0.75),
+  'llms-txt-coverage': w('medium', 0.75),
   'markdown-content-parity': w('medium', 0.75),
   'auth-alternative-access': w('medium', 0.5),
   'redirect-behavior': w('medium', 0.6),
diff --git a/src/types.ts b/src/types.ts
index ed03263..1d77fb5 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -35,7 +35,7 @@ export interface CheckContext {
   /**
    * The actual origin where content lives, when the baseUrl origin redirects
    * cross-host. Set by llms-txt-exists when it detects a cross-host redirect.
-   * Checks that need ground-truth data (e.g. sitemap for freshness) should
+   * Checks that need ground-truth data (e.g. sitemap for coverage) should
    * use this over `origin`; checks that test agent experience should use `origin`.
    */
   effectiveOrigin?: string;
diff --git a/test/integration/check-pipeline.test.ts b/test/integration/check-pipeline.test.ts
index 12bd04c..7beb6db 100644
--- a/test/integration/check-pipeline.test.ts
+++ b/test/integration/check-pipeline.test.ts
@@ -1182,7 +1182,7 @@ describe('check pipeline: canonical llms.txt selection', () => {
 });
 
 describe('check pipeline: effectiveOrigin propagation', () => {
-  it('llms-txt-exists sets effectiveOrigin which llms-txt-freshness uses', async () => {
+  it('llms-txt-exists sets effectiveOrigin which llms-txt-coverage uses', async () => {
     // llms.txt redirects cross-host; sitemap lives at the redirected host
     const redirectedHost = 'pipe-effective-docs.local';
     const llmsContent = `# Docs\n## Links\n- [Guide](http://${redirectedHost}/docs/guide): Guide\n`;
@@ -1224,18 +1224,18 @@ describe('check pipeline: effectiveOrigin propagation', () => {
     );
 
     const report = await runChecks('http://pipe-effective.local', {
-      checkIds: ['llms-txt-exists', 'llms-txt-freshness'],
+      checkIds: ['llms-txt-exists', 'llms-txt-coverage'],
       requestDelay: 0,
     });
 
     const existsResult = report.results.find((r) => r.id === 'llms-txt-exists')!;
-    const freshnessResult = report.results.find((r) => r.id === 'llms-txt-freshness')!;
+    const coverageResult = report.results.find((r) => r.id === 'llms-txt-coverage')!;
 
     // Cross-host redirect produces 'warn' (agents may not follow it)
     expect(existsResult.status).toBe('warn');
-    // Freshness should not skip — it should use the effectiveOrigin to find the sitemap
+    // Coverage should not skip — it should use the effectiveOrigin to find the sitemap
     // at the redirected host and match URLs there
-    expect(freshnessResult.status).not.toBe('skip');
-    expect(freshnessResult.message).not.toContain('No sitemap found');
+    expect(coverageResult.status).not.toBe('skip');
+    expect(coverageResult.message).not.toContain('No sitemap found');
   });
 });
diff --git a/test/unit/checks/llms-txt-freshness.test.ts b/test/unit/checks/llms-txt-coverage.test.ts
similarity index 96%
rename from test/unit/checks/llms-txt-freshness.test.ts
rename to test/unit/checks/llms-txt-coverage.test.ts
index b874b63..e7082d6 100644
--- a/test/unit/checks/llms-txt-freshness.test.ts
+++ b/test/unit/checks/llms-txt-coverage.test.ts
@@ -7,7 +7,7 @@ import type { DiscoveredFile } from '../../../src/types.js';
 import {
   hasLocaleCodeAt,
   filterToUnprefixedLocale,
-} from '../../../src/checks/observability/llms-txt-freshness.js';
+} from '../../../src/checks/observability/llms-txt-coverage.js';
 
 const server = setupServer();
 
@@ -16,7 +16,7 @@ beforeAll(() => {
   return () => server.close();
 });
 
-const check = getCheck('llms-txt-freshness');
+const check = getCheck('llms-txt-coverage');
 
 /**
  * Build a minimal llms.txt content string from an array of URLs.
@@ -61,9 +61,9 @@ function makeCtx(host: string, llmsTxtUrls: string[], basePath = '') {
   return ctx;
 }
 
-describe('llms-txt-freshness', () => {
+describe('llms-txt-coverage', () => {
   test('passes when llms.txt fully covers sitemap', async () => {
-    const host = 'fresh-pass.local';
+    const host = 'cov-pass.local';
     const pages = [
       `http://${host}/docs/getting-started`,
       `http://${host}/docs/api-reference`,
@@ -94,7 +94,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('passes when llms.txt uses .md URLs matching sitemap HTML URLs', async () => {
-    const host = 'fresh-md.local';
+    const host = 'cov-md.local';
     const llmsUrls = [
       `http://${host}/docs/getting-started.md`,
       `http://${host}/docs/api-reference.md`,
@@ -127,7 +127,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('passes with trailing slash differences', async () => {
-    const host = 'fresh-slash.local';
+    const host = 'cov-slash.local';
     const llmsUrls = [`http://${host}/docs/guide`];
     const sitemapUrls = [`http://${host}/docs/guide/`];
 
@@ -154,7 +154,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('warns when coverage is between 80% and 95%', async () => {
-    const host = 'fresh-warn.local';
+    const host = 'cov-warn.local';
     // llms.txt has 9 of 10 pages (90% coverage)
     const allPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
     const llmsPages = allPages.slice(0, 9);
@@ -183,7 +183,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('fails when coverage is below 80%', async () => {
-    const host = 'fresh-fail.local';
+    const host = 'cov-fail.local';
     // llms.txt has 5 of 10 pages (50% coverage)
     const allPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
     const llmsPages = allPages.slice(0, 5);
@@ -212,7 +212,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('reports unmatched llms.txt links not in sitemap', async () => {
-    const host = 'fresh-unmatched.local';
+    const host = 'cov-unmatched.local';
     const sitemapPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
     // llms.txt has all sitemap pages plus 3 extras not in sitemap
     const llmsPages = [
@@ -247,7 +247,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('unmatched links do not affect overall status', async () => {
-    const host = 'fresh-unmatched-pass.local';
+    const host = 'cov-unmatched-pass.local';
     // Coverage is fine (100%) but many unmatched llms.txt links
     const sitemapPages = Array.from({ length: 5 }, (_, i) => `http://${host}/docs/page-${i}`);
     const llmsPages = [
@@ -283,7 +283,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('skips when no sitemap is available', async () => {
-    const host = 'fresh-no-sitemap.local';
+    const host = 'cov-no-sitemap.local';
     const ctx = makeCtx(host, [`http://${host}/docs/page`], '/docs');
 
     server.use(
@@ -302,7 +302,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('skips when no page URLs in llms.txt', async () => {
-    const host = 'fresh-no-pages.local';
+    const host = 'cov-no-pages.local';
     const ctx = createContext(`http://${host}/docs`, { requestDelay: 0 });
     ctx.previousResults.set('llms-txt-exists', {
       id: 'llms-txt-exists',
@@ -327,7 +327,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('scopes sitemap URLs to baseUrl path prefix', async () => {
-    const host = 'fresh-scope.local';
+    const host = 'cov-scope.local';
     const docPages = [`http://${host}/docs/guide`, `http://${host}/docs/api`];
     const allSitemapPages = [
       ...docPages,
@@ -361,7 +361,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('excludes blog/changelog/pricing paths from sitemap comparison', async () => {
-    const host = 'fresh-exclude.local';
+    const host = 'cov-exclude.local';
     const docPages = [`http://${host}/guide`];
     const sitemapPages = [
       `http://${host}/guide`,
@@ -397,7 +397,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('handles index.md normalization', async () => {
-    const host = 'fresh-index.local';
+    const host = 'cov-index.local';
     const llmsUrls = [`http://${host}/docs/guide/index.md`];
     const sitemapUrls = [`http://${host}/docs/guide/`];
 
@@ -424,7 +424,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('skips when sitemap has no URLs under docs path prefix', async () => {
-    const host = 'fresh-no-scope.local';
+    const host = 'cov-no-scope.local';
     const ctx = makeCtx(host, [`http://${host}/docs/page`], '/docs');
     const sitemapPages = [`http://${host}/marketing/page1`, `http://${host}/marketing/page2`];
 
@@ -450,7 +450,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('does not count cross-origin llms.txt URLs as unmatched', async () => {
-    const host = 'fresh-cross.local';
+    const host = 'cov-cross.local';
     const sitemapPages = [`http://${host}/docs/page`];
     // llms.txt links to a page on a different host — should not be flagged
     const llmsPages = [`http://${host}/docs/page`, `http://other-host.local/docs/external`];
@@ -478,7 +478,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('falls back to docs-specific sitemap when main sitemap has no docs URLs', async () => {
-    const host = 'fresh-docs-sitemap.local';
+    const host = 'cov-docs-sitemap.local';
     const docPages = [`http://${host}/docs/guide`, `http://${host}/docs/api`];
     const marketingPages = [`http://${host}/about`, `http://${host}/pricing`];
 
@@ -517,7 +517,7 @@ describe('llms-txt-freshness', () => {
   });
 
   test('follows docs-specific sitemap index one level deep', async () => {
-    const host = 'fresh-docs-index.local';
+    const host = 'cov-docs-index.local';
     const docPages = [
       `http://${host}/docs/guide`,
       `http://${host}/docs/api`,
@@ -557,7 +557,7 @@ describe('llms-txt-freshness', () => {
     expect(result.status).toBe('pass');
     expect(result.details?.sitemapDocPages).toBe(3);
     // getUrlsFromSitemap now discovers the docs sitemap via subpath fallback,
-    // so the freshness check's own fetchDocsSitemap fallback doesn't fire.
+    // so the coverage check's own fetchDocsSitemap fallback doesn't fire.
     expect(result.details?.sitemapSource).toBe('robots.txt/sitemap.xml');
   });
 
@@ -599,7 +599,7 @@ describe('llms-txt-freshness', () => {
 
     const result = await check.run(ctx);
     expect(result.status).toBe('pass');
-    // Locale filtering now happens inside getUrlsFromSitemap, so the freshness
+    // Locale filtering now happens inside getUrlsFromSitemap, so the coverage
     // check receives only English URLs and its own locale detection is a no-op.
     expect(result.details?.sitemapDocPages).toBe(3);
   });
diff --git a/test/unit/scoring/coefficients.test.ts b/test/unit/scoring/coefficients.test.ts
index 8ab3bb7..2364fc0 100644
--- a/test/unit/scoring/coefficients.test.ts
+++ b/test/unit/scoring/coefficients.test.ts
@@ -122,7 +122,7 @@ describe('coefficients', () => {
     const affectedChecks = [
       'llms-txt-links-resolve',
       'llms-txt-valid',
-      'llms-txt-freshness',
+      'llms-txt-coverage',
       'llms-txt-links-markdown',
     ];
 
diff --git a/test/unit/scoring/proportions.test.ts b/test/unit/scoring/proportions.test.ts
index e1ea26a..061a70d 100644
--- a/test/unit/scoring/proportions.test.ts
+++ b/test/unit/scoring/proportions.test.ts
@@ -353,9 +353,9 @@ describe('proportions', () => {
       expect(result!.proportion).toBe(0.3);
     });
 
-    it('llms-txt-freshness: uses coverageRate', () => {
+    it('llms-txt-coverage: uses coverageRate', () => {
       const result = getCheckProportion(
-        makeResult('llms-txt-freshness', 'warn', {
+        makeResult('llms-txt-coverage', 'warn', {
           coverageRate: 88,
         }),
         makeWeight(4, 0.75),
@@ -386,9 +386,9 @@ describe('proportions', () => {
       expect(result!.proportion).toBe(0.0);
     });
 
-    it('llms-txt-freshness: falls back when no coverageRate', () => {
+    it('llms-txt-coverage: falls back when no coverageRate', () => {
       const result = getCheckProportion(
-        makeResult('llms-txt-freshness', 'warn', {}),
+        makeResult('llms-txt-coverage', 'warn', {}),
         makeWeight(4, 0.75),
       );
       expect(result!.proportion).toBe(0.75);
diff --git a/test/unit/scoring/resolutions.test.ts b/test/unit/scoring/resolutions.test.ts
index f8177fd..f848ea6 100644
--- a/test/unit/scoring/resolutions.test.ts
+++ b/test/unit/scoring/resolutions.test.ts
@@ -82,7 +82,7 @@ describe('resolutions', () => {
       'markdown-code-fence-validity',
       'http-status-codes',
       'redirect-behavior',
-      'llms-txt-freshness',
+      'llms-txt-coverage',
       'markdown-content-parity',
       'cache-header-hygiene',
       'auth-gate-detection',
diff --git a/test/unit/scoring/score.test.ts b/test/unit/scoring/score.test.ts
index af4ea6a..3f83740 100644
--- a/test/unit/scoring/score.test.ts
+++ b/test/unit/scoring/score.test.ts
@@ -65,7 +65,7 @@ describe('computeScore', () => {
       makeResult('markdown-code-fence-validity', 'content-structure', 'pass'),
       makeResult('http-status-codes', 'url-stability', 'pass'),
       makeResult('redirect-behavior', 'url-stability', 'pass'),
-      makeResult('llms-txt-freshness', 'observability', 'pass'),
+      makeResult('llms-txt-coverage', 'observability', 'pass'),
       makeResult('markdown-content-parity', 'observability', 'pass'),
       makeResult('cache-header-hygiene', 'observability', 'pass'),
       makeResult('auth-gate-detection', 'authentication', 'pass'),
@@ -385,7 +385,7 @@ describe('computeScore', () => {
         makeResult('redirect-behavior', 'url-stability', 'pass'),
 
         // Observability
-        makeResult('llms-txt-freshness', 'observability', 'pass'),
+        makeResult('llms-txt-coverage', 'observability', 'pass'),
         makeResult('cache-header-hygiene', 'observability', 'pass'),
 
         // No auth issues

From ce698fc923f328699235fad02291ea080f375a20 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sat, 25 Apr 2026 13:54:41 -0400
Subject: [PATCH 03/13] Omit subtrees from coverage check, support curation and
 exclusion configuration

---
 docs/checks/observability.md                  |  80 ++++-
 docs/reference/cli.md                         |  16 +
 docs/reference/config-file.md                 |  36 +-
 package-lock.json                             |  16 +-
 package.json                                  |   2 +
 src/checks/observability/llms-txt-coverage.ts | 125 ++++++-
 src/cli/commands/check.ts                     |  26 ++
 src/constants.ts                              |   6 +
 src/helpers/get-page-urls.ts                  |  35 +-
 src/scoring/resolutions.ts                    |  11 +-
 src/types.ts                                  |   6 +
 test/unit/checks/llms-txt-coverage.test.ts    | 324 +++++++++++++++++-
 12 files changed, 621 insertions(+), 62 deletions(-)

diff --git a/docs/checks/observability.md b/docs/checks/observability.md
index 5784dde..7c0a240 100644
--- a/docs/checks/observability.md
+++ b/docs/checks/observability.md
@@ -4,7 +4,7 @@ Whether agent-facing resources stay accurate over time. Getting `llms.txt` and m
 
 ## llms-txt-coverage
 
-Whether your `llms.txt` reflects the current state of your documentation site.
+How much of your site's documentation is represented in `llms.txt`.
 
 |                |                                                                        |
 | -------------- | ---------------------------------------------------------------------- |
@@ -14,21 +14,83 @@ Whether your `llms.txt` reflects the current state of your documentation site.
 
 ### Why it matters
 
-An `llms.txt` that was accurate at launch but never updated is a silent failure. New pages won't appear in the index, deleted pages send agents to 404s, and renamed pages produce redirect chains. Unlike `llms-txt-links-resolve` (which catches broken links), this check catches missing coverage: pages that exist on your site but aren't listed in `llms.txt`.
+Pages missing from `llms.txt` are effectively invisible to agents that rely on it for discovery. Unlike `llms-txt-links-resolve` (which catches broken links to pages that are listed), this check catches the opposite problem: pages that exist on your site but aren't listed at all. Not every gap is a problem; many sites intentionally curate their `llms.txt`. The check makes coverage visible so you can confirm it reflects your intent.
 
 ### Results
 
-Based on coverage of your site's documentation pages (excluding non-docs pages like blog posts, pricing, login):
+Based on coverage of your site's documentation pages, after excluding non-doc pages (see [built-in exclusions](#built-in-exclusions) below). Thresholds are configurable.
 
-| Result | Condition                                                       |
-| ------ | --------------------------------------------------------------- |
-| Pass   | `llms.txt` covers 95% or more of the site's documentation pages |
-| Warn   | 80-95% coverage (some live pages missing from the index)        |
-| Fail   | Under 80% coverage (missing large documentation sections)       |
+| Result | Condition                                                                |
+| ------ | ------------------------------------------------------------------------ |
+| Pass   | `llms.txt` covers >= pass threshold (default 95%) of documentation pages |
+| Warn   | Coverage between warn and pass thresholds (default 80-95%)               |
+| Fail   | Coverage below warn threshold (default < 80%)                            |
+
+### Configuring coverage
+
+The check supports three use cases through configurable thresholds and exclusion patterns:
+
+- **Full parity** (default): The site intends `llms.txt` to mirror the sitemap. Default thresholds (95/80) apply.
+- **Curated**: The site intentionally includes only a subset. Set thresholds to 0 (`--coverage-pass-threshold 0 --coverage-warn-threshold 0`) to make the check informational. It still reports coverage percentage and missing pages, but does not warn or fail.
+- **Hybrid**: Strict coverage with known exclusions. Use `--coverage-exclusions` to remove intentional gaps from the denominator; the check holds remaining pages to default or custom thresholds.
+
+**CLI flags:**
+
+- `--coverage-pass-threshold <n>` — Pass threshold (0-100, default 95)
+- `--coverage-warn-threshold <n>` — Warn threshold (0-100, default 80)
+- `--coverage-exclusions <patterns>` — Comma-separated glob patterns to exclude from the sitemap before calculating coverage (e.g. `"/docs/reference/**,/docs/changelog/**"`)
+
+These can also be set in `agent-docs.config.yml` under `options`:
+
+```yaml
+options:
+  coveragePassThreshold: 80
+  coverageWarnThreshold: 50
+  coverageExclusions:
+    - /docs/reference/**
+    - /docs/changelog/**
+    - '**/release-notes/**' # quote patterns starting with *
+```
 
 ### How to fix
 
-**If this check warns or fails**, regenerate `llms.txt` from your sitemap or build pipeline. The best long-term fix is generating `llms.txt` at build time, so every deployment automatically includes an up-to-date index. Run with `--verbose` to see which pages are missing.
+**If this check warns or fails**, regenerate `llms.txt` from your sitemap or build pipeline. The best long-term fix is generating `llms.txt` at build time, so every deployment automatically includes an up-to-date index. Run with `--verbose` to see which pages are missing. If the missing pages are intentionally excluded, use `--coverage-exclusions` or adjust thresholds.
+
+### Built-in exclusions
+
+Before calculating coverage, the check removes sitemap URLs whose paths match common non-documentation patterns. These pages appear in sitemaps but aren't meaningful to include in an `llms.txt` index. The excluded count is reported as `excludedNonDocPages` in the check details.
+
+The tool provides these built-in exclusions (matched at both root and relative to the base URL path):
+
+`/blog`, `/pricing`, `/about`, `/career`, `/careers`, `/job`, `/jobs`, `/contact`, `/legal`, `/privacy`, `/terms`, `/login`, `/signup`, `/sign-up`, `/sign-in`, `/register`, `/404`, `/500`
+
+For example, if your base URL is `https://example.com/docs`, both `/blog/post-1` and `/docs/blog/post-1` would be excluded.
+
+These are not configurable. If a built-in exclusion is removing pages you want counted, the page is likely at a path that conventionally indicates non-doc content. If you believe a pattern is wrong, please [open an issue](https://github.com/agent-ecosystem/afdocs/issues).
+
+Paths like `/changelog`, `/releases`, and `/security` are **not** excluded because many documentation sites intentionally include this content in their `llms.txt`. If you want to exclude them, use `--coverage-exclusions`.
+
+### Omitted subtrees
+
+When your `llms.txt` uses [progressive disclosure](https://agentdocsspec.com/spec/#progressive-disclosure-for-large-documentation-sets) (nested `llms.txt` files), the walker descends one level into linked `.txt` files. Any `.txt` files found at that depth (which the walker does not descend into) are treated as "omitted subtrees." Sitemap pages under those subtree prefixes are excluded from the coverage denominator rather than counted as missing.
+
+This means deeply nested `llms.txt` structures aren't penalized. The output distinguishes directly-verified pages from omitted subtrees.
+
+**Why not walk recursively?** A recursive walk would fetch every nested `.txt` file before any checks run. For a site like Alchemy, that's ~86 aggregate files across three levels. For a multi-product site like Microsoft Learn, it could be hundreds. A safety cap (e.g. 200 files) would silently truncate results, producing incomplete coverage numbers with no indication they're partial. Keeping the walker at depth 1 makes the HTTP footprint predictable, makes the runs more performant, and makes the results reproducible.
+
+**Run per-product for deeper visibility.** Organizations with large multi-product sites typically run `afdocs` at the per-product level, which gives full coverage visibility into each section without the cost of walking the entire tree:
+
+```bash
+# Instead of walking the entire site's progressive disclosure tree:
+afdocs check https://example.com/docs
+
+# Run per-product for deeper coverage:
+afdocs check https://example.com/docs/chains/ethereum
+afdocs check https://example.com/docs/chains/solana
+afdocs check https://example.com/docs/sdk
+```
+
+Each per-product run picks up that section's `llms.txt` as canonical. For the sitemap, the tool scopes the root sitemap's URLs to the base path prefix. If no URLs match (common when the root sitemap doesn't cover the section), it falls back to looking for a section-level sitemap at `{basePath}/sitemap.xml`. This keeps runs fast and results meaningful.
 
 ---
 
diff --git a/docs/reference/cli.md b/docs/reference/cli.md
index c9a9190..987dbe8 100644
--- a/docs/reference/cli.md
+++ b/docs/reference/cli.md
@@ -198,6 +198,22 @@ These thresholds apply to `page-size-html`, `page-size-markdown`, and `tabbed-co
 
 The defaults (50K pass, 100K fail) reflect observed agent truncation limits. You generally don't need to change these unless you have specific knowledge of your users' agent platforms.
 
+### Coverage thresholds
+
+| Flag                               | Default | Description                                                   |
+| ---------------------------------- | ------- | ------------------------------------------------------------- |
+| `--coverage-pass-threshold <n>`    | `95`    | `llms-txt-coverage` pass threshold (percentage, 0-100)        |
+| `--coverage-warn-threshold <n>`    | `80`    | `llms-txt-coverage` warn threshold (percentage, 0-100)        |
+| `--coverage-exclusions <patterns>` |         | Comma-separated glob patterns to exclude from the denominator |
+
+These control the `llms-txt-coverage` check, which compares `llms.txt` page URLs against the sitemap. Set both thresholds to `0` to make the check informational: it still reports coverage percentage and missing pages, but doesn't warn or fail.
+
+Use exclusion patterns with glob syntax (`**` matches across path segments, `*` matches within one) to remove matching sitemap URLs from the denominator before calculating coverage. Exclude content like API reference pages or changelog archives that you omit intentionally from llms.txt:
+
+```bash
+afdocs check https://example.com --coverage-exclusions "/docs/reference/**,/docs/changelog/**"
+```
+
 ## Exit codes
 
 | Code | Meaning                     |
diff --git a/docs/reference/config-file.md b/docs/reference/config-file.md
index f6d2919..a8addf8 100644
--- a/docs/reference/config-file.md
+++ b/docs/reference/config-file.md
@@ -36,6 +36,13 @@ options:
   thresholds:
     pass: 50000
     fail: 100000
+  # Coverage check: thresholds and exclusions
+  # coveragePassThreshold: 95
+  # coverageWarnThreshold: 80
+  # coverageExclusions:
+  #   - /docs/reference/**
+  #   - /docs/changelog/**
+  #   - "**/release-notes/**"  # quote patterns starting with *
 
 # Optional: test specific pages instead of discovering via llms.txt/sitemap
 # pages:
@@ -71,19 +78,22 @@ skipChecks:
 
 Override default runner options. All fields are optional:
 
-| Field              | Default     | Description                                                                                 |
-| ------------------ | ----------- | ------------------------------------------------------------------------------------------- |
-| `maxLinksToTest`   | `50`        | Maximum number of pages to sample                                                           |
-| `samplingStrategy` | `random`    | `random`, `deterministic`, `curated`, or `none`                                             |
-| `maxConcurrency`   | `3`         | Maximum concurrent HTTP requests                                                            |
-| `requestDelay`     | `200`       | Delay between requests in milliseconds                                                      |
-| `requestTimeout`   | `30000`     | Timeout for individual HTTP requests in milliseconds                                        |
-| `preferredLocale`  | auto-detect | Preferred locale for URL discovery (e.g. `en`, `fr`, `ja`)                                  |
-| `preferredVersion` | auto-detect | Preferred version for URL discovery (e.g. `v3`, `2.x`)                                      |
-| `canonicalOrigin`  |             | The production domain your content links to                                                 |
-| `llmsTxtUrl`       |             | Explicit llms.txt URL to use as canonical (overrides the discovery heuristic; see CLI docs) |
-| `thresholds.pass`  | `50000`     | Page size pass threshold in characters                                                      |
-| `thresholds.fail`  | `100000`    | Page size fail threshold in characters                                                      |
+| Field                   | Default     | Description                                                                                              |
+| ----------------------- | ----------- | -------------------------------------------------------------------------------------------------------- |
+| `maxLinksToTest`        | `50`        | Maximum number of pages to sample                                                                        |
+| `samplingStrategy`      | `random`    | `random`, `deterministic`, `curated`, or `none`                                                          |
+| `maxConcurrency`        | `3`         | Maximum concurrent HTTP requests                                                                         |
+| `requestDelay`          | `200`       | Delay between requests in milliseconds                                                                   |
+| `requestTimeout`        | `30000`     | Timeout for individual HTTP requests in milliseconds                                                     |
+| `preferredLocale`       | auto-detect | Preferred locale for URL discovery (e.g. `en`, `fr`, `ja`)                                               |
+| `preferredVersion`      | auto-detect | Preferred version for URL discovery (e.g. `v3`, `2.x`)                                                   |
+| `canonicalOrigin`       |             | The production domain your content links to                                                              |
+| `llmsTxtUrl`            |             | Explicit llms.txt URL to use as canonical (overrides the discovery heuristic; see CLI docs)              |
+| `thresholds.pass`       | `50000`     | Page size pass threshold in characters                                                                   |
+| `thresholds.fail`       | `100000`    | Page size fail threshold in characters                                                                   |
+| `coveragePassThreshold` | `95`        | `llms-txt-coverage` pass threshold (percentage, 0-100)                                                   |
+| `coverageWarnThreshold` | `80`        | `llms-txt-coverage` warn threshold (percentage, 0-100)                                                   |
+| `coverageExclusions`    |             | Glob patterns to exclude from the sitemap before calculating coverage (quote patterns starting with `*`) |
 
 ### `pages` (optional)
 
diff --git a/package-lock.json b/package-lock.json
index 6d3142c..85f8644 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -12,6 +12,7 @@
         "chalk": "^5.4.1",
         "commander": "^13.1.0",
         "node-html-parser": "^7.1.0",
+        "picomatch": "^4.0.4",
         "turndown": "^7.2.2",
         "turndown-plugin-gfm": "^1.0.2",
         "yaml": "^2.7.0"
@@ -22,6 +23,7 @@
       "devDependencies": {
         "@eslint/js": "^10.0.1",
         "@types/node": "^22.13.4",
+        "@types/picomatch": "^4.0.3",
         "@types/turndown": "^5.0.6",
         "@vitest/coverage-v8": "^4.0.18",
         "eslint": "^10.0.1",
@@ -1291,6 +1293,13 @@
         "undici-types": "~6.21.0"
       }
     },
+    "node_modules/@types/picomatch": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/@types/picomatch/-/picomatch-4.0.3.tgz",
+      "integrity": "sha512-iG0T6+nYJ9FAPmx9SsUlnwcq1ZVRuCXcVEvWnntoPlrOpwtSTKNDC9uVAxTsC3PUvJ+99n4RpAcNgBbHX3JSnQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@types/statuses": {
       "version": "2.0.6",
       "resolved": "https://registry.npmjs.org/@types/statuses/-/statuses-2.0.6.tgz",
@@ -3426,10 +3435,9 @@
       "license": "ISC"
     },
     "node_modules/picomatch": {
-      "version": "4.0.3",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
-      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
-      "dev": true,
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
+      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
       "license": "MIT",
       "peer": true,
       "engines": {
diff --git a/package.json b/package.json
index 1100388..c6e5ba6 100644
--- a/package.json
+++ b/package.json
@@ -79,6 +79,7 @@
     "chalk": "^5.4.1",
     "commander": "^13.1.0",
     "node-html-parser": "^7.1.0",
+    "picomatch": "^4.0.4",
     "turndown": "^7.2.2",
     "turndown-plugin-gfm": "^1.0.2",
     "yaml": "^2.7.0"
@@ -86,6 +87,7 @@
   "devDependencies": {
     "@eslint/js": "^10.0.1",
     "@types/node": "^22.13.4",
+    "@types/picomatch": "^4.0.3",
     "@types/turndown": "^5.0.6",
     "@vitest/coverage-v8": "^4.0.18",
     "eslint": "^10.0.1",
diff --git a/src/checks/observability/llms-txt-coverage.ts b/src/checks/observability/llms-txt-coverage.ts
index 30dddb8..1ddf47b 100644
--- a/src/checks/observability/llms-txt-coverage.ts
+++ b/src/checks/observability/llms-txt-coverage.ts
@@ -1,11 +1,16 @@
 import { registerCheck } from '../registry.js';
 import {
-  getUrlsFromCachedLlmsTxt,
+  getUrlsFromCachedLlmsTxtWithOmitted,
   getUrlsFromSitemap,
   parseSitemapUrls,
 } from '../../helpers/get-page-urls.js';
 import { isNonPageUrl } from '../../helpers/to-md-urls.js';
 import { isLocaleSegment, hasStructuralDuplication } from '../../helpers/locale-codes.js';
+import {
+  DEFAULT_COVERAGE_PASS_THRESHOLD,
+  DEFAULT_COVERAGE_WARN_THRESHOLD,
+} from '../../constants.js';
+import picomatch from 'picomatch';
 import type { CheckContext, CheckResult } from '../../types.js';
 
 /**
@@ -41,8 +46,6 @@ export function normalizeUrlPath(url: string): string {
  */
 const EXCLUDED_PATH_PATTERNS = [
   /^\/blog(\/|$)/i,
-  /^\/changelog(\/|$)/i,
-  /^\/releases?(\/|$)/i,
   /^\/pricing(\/|$)/i,
   /^\/about(\/|$)/i,
   /^\/careers?(\/|$)/i,
@@ -51,8 +54,6 @@ const EXCLUDED_PATH_PATTERNS = [
   /^\/legal(\/|$)/i,
   /^\/privacy(\/|$)/i,
   /^\/terms(\/|$)/i,
-  /^\/security(\/|$)/i,
-  /^\/status(\/|$)/i,
   /^\/login(\/|$)/i,
   /^\/signup(\/|$)/i,
   /^\/sign-up(\/|$)/i,
@@ -76,6 +77,50 @@ export function isExcludedPath(normalizedPath: string, baseUrlPath?: string): bo
   return false;
 }
 
+/**
+ * Compile an array of glob patterns into a single picomatch matcher.
+ * Returns a function that tests a URL path against all patterns.
+ */
+export function compileExclusionMatcher(patterns: string[]): (path: string) => boolean {
+  if (patterns.length === 0) return () => false;
+  return picomatch(patterns, { nocase: true });
+}
+
+/**
+ * Test whether a normalized path matches any of the user-supplied exclusion globs.
+ * Patterns are tested against both the absolute path and the path relative to baseUrlPath.
+ */
+export function matchesUserExclusion(
+  normalizedPath: string,
+  matcher: (path: string) => boolean,
+  baseUrlPath?: string,
+): boolean {
+  if (matcher(normalizedPath)) return true;
+  if (baseUrlPath && baseUrlPath !== '/' && normalizedPath.startsWith(baseUrlPath)) {
+    const relative = normalizedPath.slice(baseUrlPath.length) || '/';
+    if (matcher(relative)) return true;
+  }
+  return false;
+}
+
+/**
+ * Extract path prefixes from omitted .txt URLs.
+ * e.g. /docs/chains/ethereum/llms.txt → /docs/chains/ethereum
+ */
+export function extractOmittedPrefixes(omittedTxtUrls: string[]): string[] {
+  const prefixes: string[] = [];
+  for (const url of omittedTxtUrls) {
+    try {
+      const parsed = new URL(url);
+      const dir = parsed.pathname.replace(/\/[^/]+$/, '');
+      if (dir) prefixes.push(dir.toLowerCase());
+    } catch {
+      continue;
+    }
+  }
+  return prefixes;
+}
+
 /**
  * Detect whether a URL set uses locale-prefixed paths and, if so, return the
  * path segment position where locales appear.
@@ -197,10 +242,6 @@ export function filterToUnprefixedLocale(urls: string[], position: number): stri
   return urls.filter((url) => !hasLocaleCodeAt(url, position));
 }
 
-/** Coverage thresholds */
-const COVERAGE_PASS = 0.95;
-const COVERAGE_WARN = 0.8;
-
 /**
  * Maximum sitemap URLs to collect for coverage comparison.
  * Higher than the default MAX_SITEMAP_URLS (500) used for page sampling,
@@ -274,8 +315,28 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
   const id = 'llms-txt-coverage';
   const category = 'observability';
 
-  // 1. Get llms.txt page URLs (with progressive disclosure walking)
-  const llmsTxtUrls = await getUrlsFromCachedLlmsTxt(ctx);
+  // Resolve thresholds: CLI/config overrides → defaults, clamped to [0, 100]
+  const clamp = (v: number) => Math.max(0, Math.min(100, v));
+  const rawPass = ctx.options.coveragePassThreshold ?? DEFAULT_COVERAGE_PASS_THRESHOLD;
+  const rawWarn = ctx.options.coverageWarnThreshold ?? DEFAULT_COVERAGE_WARN_THRESHOLD;
+  const passThreshold = clamp(rawPass) / 100;
+  const warnThreshold = clamp(rawWarn) / 100;
+  const thresholdWarnings: string[] = [];
+  if (passThreshold < warnThreshold) {
+    thresholdWarnings.push(
+      `coveragePassThreshold (${clamp(rawPass)}) is lower than ` +
+        `coverageWarnThreshold (${clamp(rawWarn)}); warn state is unreachable`,
+    );
+  }
+
+  // Compile user-supplied exclusion patterns
+  const userExclusionMatcher = compileExclusionMatcher(ctx.options.coverageExclusions ?? []);
+
+  // 1. Get llms.txt page URLs + omitted subtrees (progressive disclosure)
+  const walkResult = await getUrlsFromCachedLlmsTxtWithOmitted(ctx);
+  const llmsTxtUrls = walkResult.pageUrls;
+  const omittedPrefixes = extractOmittedPrefixes(walkResult.omittedTxtUrls);
+
   if (llmsTxtUrls.length === 0) {
     return {
       id,
@@ -364,17 +425,30 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
     }
   }
 
-  // 3. Normalize both sets for comparison
+  // 3. Normalize both sets for comparison, applying exclusions:
+  //    - Built-in non-doc path patterns (blog, changelog, etc.)
+  //    - User-supplied exclusion globs (--coverage-exclusions)
+  //    - Omitted subtree prefixes (nested llms.txt indexes not walked)
   const llmsNormalized = new Set(llmsTxtUrls.map(normalizeUrlPath));
   const sitemapNormalized = new Map<string, string>(); // normalized -> original URL
+  let omittedSubtreeCount = 0;
+  let userExcludedCount = 0;
   for (const url of scopedSitemapUrls) {
     const norm = normalizeUrlPath(url);
-    if (!isExcludedPath(norm, baseUrlPath)) {
-      sitemapNormalized.set(norm, url);
+    if (isExcludedPath(norm, baseUrlPath)) continue;
+    if (matchesUserExclusion(norm, userExclusionMatcher, baseUrlPath)) {
+      userExcludedCount++;
+      continue;
     }
+    if (omittedPrefixes.length > 0 && omittedPrefixes.some((p) => norm.startsWith(p))) {
+      omittedSubtreeCount++;
+      continue;
+    }
+    sitemapNormalized.set(norm, url);
   }
 
-  const excludedCount = scopedSitemapUrls.length - sitemapNormalized.size;
+  const excludedCount =
+    scopedSitemapUrls.length - sitemapNormalized.size - omittedSubtreeCount - userExcludedCount;
 
   // 4. Missing coverage: in sitemap but not in llms.txt
   const missingFromLlmsTxt: string[] = [];
@@ -424,12 +498,12 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
   const coveragePct = Math.round(coverageRate * 100);
   const unmatchedPct = Math.round(unmatchedRate * 100);
 
-  // 7. Determine status based on coverage only
+  // 7. Determine status based on coverage and configurable thresholds
   //    Unmatched links are informational (see note in step 5)
   let overallStatus: 'pass' | 'warn' | 'fail';
-  if (coverageRate >= COVERAGE_PASS) {
+  if (coverageRate >= passThreshold) {
     overallStatus = 'pass';
-  } else if (coverageRate >= COVERAGE_WARN) {
+  } else if (coverageRate >= warnThreshold) {
     overallStatus = 'warn';
   } else {
     overallStatus = 'fail';
@@ -444,6 +518,11 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
       `llms.txt covers ${coveredCount}/${sitemapDocPages} sitemap doc pages (${coveragePct}%); ${missingFromLlmsTxt.length} missing`,
     );
   }
+  if (omittedSubtreeCount > 0) {
+    parts.push(
+      `${walkResult.omittedTxtUrls.length} nested indexes omitted (${omittedSubtreeCount} sitemap pages excluded)`,
+    );
+  }
   if (unmatchedLlmsTxtUrls.length > 0) {
     parts.push(
       `${unmatchedLlmsTxtUrls.length} llms.txt links not in sitemap (may indicate stale links or incomplete sitemap)`,
@@ -464,15 +543,25 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
       sitemapDocPages,
       sitemapSource,
       excludedNonDocPages: excludedCount,
+      ...(userExcludedCount > 0 ? { userExcludedPages: userExcludedCount } : {}),
+      ...(omittedSubtreeCount > 0
+        ? {
+            omittedSubtrees: walkResult.omittedTxtUrls.length,
+            omittedSubtreePages: omittedSubtreeCount,
+          }
+        : {}),
       ...(localeFiltered ? { localeFiltered: true, detectedLocale } : {}),
       baseUrlPath: baseUrlPath || '/',
       coverageRate: coveragePct,
+      coveragePassThreshold: Math.round(passThreshold * 100),
+      coverageWarnThreshold: Math.round(warnThreshold * 100),
       missingFromLlmsTxt: missingFromLlmsTxt.slice(0, 50),
       missingCount: missingFromLlmsTxt.length,
       unmatchedLlmsTxtUrls: unmatchedLlmsTxtUrls.slice(0, 50),
       unmatchedCount: unmatchedLlmsTxtUrls.length,
       unmatchedPct,
       sitemapWarnings,
+      ...(thresholdWarnings.length > 0 ? { thresholdWarnings } : {}),
     },
   };
 }
diff --git a/src/cli/commands/check.ts b/src/cli/commands/check.ts
index 36cd7b2..f3f6992 100644
--- a/src/cli/commands/check.ts
+++ b/src/cli/commands/check.ts
@@ -38,6 +38,12 @@ export function registerCheckCommand(program: Command): void {
     .option('-v, --verbose', 'Show per-page details for checks with issues')
     .option('--fixes', 'Show fix suggestions for warn/fail checks')
     .option('--score', 'Include scoring data in JSON output')
+    .option('--coverage-pass-threshold <n>', 'llms-txt-coverage pass threshold (0-100, default 95)')
+    .option('--coverage-warn-threshold <n>', 'llms-txt-coverage warn threshold (0-100, default 80)')
+    .option(
+      '--coverage-exclusions <patterns>',
+      'Comma-separated glob patterns to exclude from coverage denominator',
+    )
     .option(
       '--canonical-origin <url>',
       'The production domain your content links to (for preview/staging testing)',
@@ -221,6 +227,23 @@ export function registerCheckCommand(program: Command): void {
         }
       }
 
+      const coveragePassThreshold =
+        opts.coveragePassThreshold != null
+          ? parseInt(String(opts.coveragePassThreshold), 10)
+          : (config?.options?.coveragePassThreshold ?? undefined);
+      const coverageWarnThreshold =
+        opts.coverageWarnThreshold != null
+          ? parseInt(String(opts.coverageWarnThreshold), 10)
+          : (config?.options?.coverageWarnThreshold ?? undefined);
+
+      const coverageExclusions =
+        opts.coverageExclusions != null
+          ? (opts.coverageExclusions as string)
+              .split(',')
+              .map((s) => s.trim())
+              .filter(Boolean)
+          : (config?.options?.coverageExclusions ?? undefined);
+
       const report = await runChecks(url, {
         checkIds,
         skipCheckIds,
@@ -237,6 +260,9 @@ export function registerCheckCommand(program: Command): void {
         ...(preferredVersion && { preferredVersion }),
         ...(canonicalOrigin && { canonicalOrigin }),
         ...(llmsTxtUrl && { llmsTxtUrl }),
+        ...(coveragePassThreshold != null && { coveragePassThreshold }),
+        ...(coverageWarnThreshold != null && { coverageWarnThreshold }),
+        ...(coverageExclusions && { coverageExclusions }),
       });
 
       let output: string;
diff --git a/src/constants.ts b/src/constants.ts
index 1fa63d1..670e1a9 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -34,5 +34,11 @@ export const LINK_RESOLVE_THRESHOLD = 0.9;
 /** Maximum number of URLs to collect from sitemaps before stopping. */
 export const MAX_SITEMAP_URLS = 500;
 
+/** Default llms-txt-coverage pass threshold (percentage). */
+export const DEFAULT_COVERAGE_PASS_THRESHOLD = 95;
+
+/** Default llms-txt-coverage warn threshold (percentage). */
+export const DEFAULT_COVERAGE_WARN_THRESHOLD = 80;
+
 /** Base URL for the Agent-Friendly Documentation Spec. */
 export const SPEC_BASE_URL = 'https://agentdocsspec.com/spec/';
diff --git a/src/helpers/get-page-urls.ts b/src/helpers/get-page-urls.ts
index ba74302..1b6d42d 100644
--- a/src/helpers/get-page-urls.ts
+++ b/src/helpers/get-page-urls.ts
@@ -39,6 +39,13 @@ export function parseSitemapUrls(xml: string): { urls: string[]; sitemapIndexUrl
 }
 
 export async function getUrlsFromCachedLlmsTxt(ctx: CheckContext): Promise<string[]> {
+  const result = await getUrlsFromCachedLlmsTxtWithOmitted(ctx);
+  return result.pageUrls;
+}
+
+export async function getUrlsFromCachedLlmsTxtWithOmitted(
+  ctx: CheckContext,
+): Promise<AggregateWalkResult> {
   const existsResult = ctx.previousResults.get('llms-txt-exists');
   const discovered = getLlmsTxtFilesForAnalysis(existsResult);
 
@@ -85,9 +92,16 @@ function extractLinksFromLlmsTxtFiles(files: DiscoveredFile[]): string[] {
  * origin as the site being tested. This covers both sub-product llms.txt
  * files (Cloudflare) and aggregate content files (Supabase).
  */
-async function walkAggregateLinks(ctx: CheckContext, urls: string[]): Promise<string[]> {
+export interface AggregateWalkResult {
+  pageUrls: string[];
+  /** Same-origin .txt URLs found at depth 1 that the walker did not descend into. */
+  omittedTxtUrls: string[];
+}
+
+async function walkAggregateLinks(ctx: CheckContext, urls: string[]): Promise<AggregateWalkResult> {
   const pageUrls: string[] = [];
   const aggregateUrls: string[] = [];
+  const omittedTxtUrls: string[] = [];
 
   const siteOrigin = ctx.effectiveOrigin ?? ctx.origin;
 
@@ -110,7 +124,7 @@ async function walkAggregateLinks(ctx: CheckContext, urls: string[]): Promise<st
     }
   }
 
-  if (aggregateUrls.length === 0) return pageUrls;
+  if (aggregateUrls.length === 0) return { pageUrls, omittedTxtUrls };
 
   // Fetch aggregate files and extract their links
   for (const aggUrl of aggregateUrls) {
@@ -133,13 +147,15 @@ async function walkAggregateLinks(ctx: CheckContext, urls: string[]): Promise<st
       const subUrls = extractLinksFromLlmsTxtFiles([subFile]);
 
       for (const subUrl of subUrls) {
-        // Only keep same-origin page URLs (skip further .txt nesting)
         try {
           const parsed = new URL(subUrl);
-          if (
-            (parsed.origin === ctx.origin || parsed.origin === siteOrigin) &&
-            !isNonPageUrl(subUrl)
-          ) {
+          const isSameOrigin = parsed.origin === ctx.origin || parsed.origin === siteOrigin;
+          if (!isSameOrigin) continue;
+
+          if (/\.txt$/i.test(parsed.pathname)) {
+            // Depth-1 .txt link: record as omitted rather than descending
+            omittedTxtUrls.push(subUrl);
+          } else if (!isNonPageUrl(subUrl)) {
             pageUrls.push(subUrl);
           }
         } catch {
@@ -151,7 +167,7 @@ async function walkAggregateLinks(ctx: CheckContext, urls: string[]): Promise<st
     }
   }
 
-  return pageUrls;
+  return { pageUrls, omittedTxtUrls };
 }
 
 /**
@@ -198,7 +214,8 @@ async function fetchLlmsTxtUrls(ctx: CheckContext): Promise<string[]> {
   const canonical = selectCanonicalLlmsTxt(discovered, ctx.baseUrl);
   const filesForAnalysis = canonical ? [canonical] : [];
   const urls = extractLinksFromLlmsTxtFiles(filesForAnalysis);
-  return walkAggregateLinks(ctx, urls);
+  const result = await walkAggregateLinks(ctx, urls);
+  return result.pageUrls;
 }
 
 /**
diff --git a/src/scoring/resolutions.ts b/src/scoring/resolutions.ts
index 254ce75..b42eeca 100644
--- a/src/scoring/resolutions.ts
+++ b/src/scoring/resolutions.ts
@@ -275,15 +275,22 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
   'llms-txt-coverage': {
     warn: (d) => {
       const missing = (d.missingCount as number) ?? 0;
+      const coverage = (d.coverageRate as number) ?? 0;
+      const warnThreshold = (d.coverageWarnThreshold as number) ?? 80;
+      const passThreshold = (d.coveragePassThreshold as number) ?? 95;
       return (
-        `Your llms.txt covers 80-95% of your site's pages. ${missing} live ` +
+        `Your llms.txt covers ${coverage}% of your site's pages ` +
+        `(${warnThreshold}-${passThreshold}% is warn). ${missing} live ` +
         'pages are not represented in the index.'
       );
     },
     fail: (d) => {
       const missing = (d.missingCount as number) ?? 0;
+      const coverage = (d.coverageRate as number) ?? 0;
+      const warnThreshold = (d.coverageWarnThreshold as number) ?? 80;
       return (
-        `Your llms.txt covers less than 80% of your site's pages. ` +
+        `Your llms.txt covers ${coverage}% of your site's pages ` +
+        `(below ${warnThreshold}% threshold). ` +
         `${missing} live pages are missing from the index. Regenerate ` +
         'llms.txt from your sitemap or build pipeline.'
       );
diff --git a/src/types.ts b/src/types.ts
index 1d77fb5..792517f 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -84,6 +84,12 @@ export interface CheckOptions {
   preferredVersion?: string;
   /** Canonical origin to rewrite in fetched content (for preview/staging testing). */
   canonicalOrigin?: string;
+  /** Pass threshold for llms-txt-coverage (0–100). Default 95. */
+  coveragePassThreshold?: number;
+  /** Warn threshold for llms-txt-coverage (0–100). Default 80. */
+  coverageWarnThreshold?: number;
+  /** Glob patterns to exclude from the sitemap before calculating coverage. */
+  coverageExclusions?: string[];
   /**
    * Explicit URL to use as the canonical llms.txt for downstream sampling and
    * analysis. When set, the standard candidate-discovery heuristic is bypassed
diff --git a/test/unit/checks/llms-txt-coverage.test.ts b/test/unit/checks/llms-txt-coverage.test.ts
index e7082d6..72412fb 100644
--- a/test/unit/checks/llms-txt-coverage.test.ts
+++ b/test/unit/checks/llms-txt-coverage.test.ts
@@ -7,6 +7,8 @@ import type { DiscoveredFile } from '../../../src/types.js';
 import {
   hasLocaleCodeAt,
   filterToUnprefixedLocale,
+  compileExclusionMatcher,
+  extractOmittedPrefixes,
 } from '../../../src/checks/observability/llms-txt-coverage.js';
 
 const server = setupServer();
@@ -360,13 +362,12 @@ describe('llms-txt-coverage', () => {
     expect(result.details?.coverageRate).toBe(100);
   });
 
-  test('excludes blog/changelog/pricing paths from sitemap comparison', async () => {
+  test('excludes blog/pricing/careers paths from sitemap comparison', async () => {
     const host = 'cov-exclude.local';
     const docPages = [`http://${host}/guide`];
     const sitemapPages = [
       `http://${host}/guide`,
       `http://${host}/blog/post-1`,
-      `http://${host}/changelog/v2`,
       `http://${host}/pricing`,
       `http://${host}/careers/engineer`,
     ];
@@ -392,7 +393,7 @@ describe('llms-txt-coverage', () => {
     const result = await check.run(ctx);
     // Only /guide should be in the doc pages set (others excluded)
     expect(result.details?.sitemapDocPages).toBe(1);
-    expect(result.details?.excludedNonDocPages).toBe(4);
+    expect(result.details?.excludedNonDocPages).toBe(3);
     expect(result.status).toBe('pass');
   });
 
@@ -724,12 +725,12 @@ describe('llms-txt-coverage', () => {
     const host = 'basepath-exclude.local';
     const pages = [`http://${host}/docs/getting-started`, `http://${host}/docs/api-reference`];
 
-    // Sitemap includes /docs/changelog pages that should be excluded
+    // Sitemap includes /docs/blog and /docs/pricing pages that should be excluded
     const sitemapPages = [
       ...pages,
-      `http://${host}/docs/changelog/2024-01-01`,
-      `http://${host}/docs/changelog/2024-02-01`,
       `http://${host}/docs/blog/post-1`,
+      `http://${host}/docs/blog/post-2`,
+      `http://${host}/docs/pricing`,
     ];
 
     const ctx = makeCtx(host, pages, '/docs');
@@ -750,7 +751,7 @@ describe('llms-txt-coverage', () => {
 
     const result = await check.run(ctx);
     expect(result.status).toBe('pass');
-    // Only 2 doc pages remain after excluding /docs/changelog and /docs/blog
+    // Only 2 doc pages remain after excluding /docs/blog and /docs/pricing
     expect(result.details?.sitemapDocPages).toBe(2);
     expect(result.details?.excludedNonDocPages).toBe(3);
   });
@@ -795,3 +796,312 @@ describe('filterToUnprefixedLocale', () => {
     expect(filterToUnprefixedLocale(urls, 1)).toEqual(urls);
   });
 });
+
+describe('configurable thresholds', () => {
+  test('uses custom pass threshold', async () => {
+    const host = 'cov-custom-pass.local';
+    // 9 of 10 pages = 90% coverage. Default would warn, but pass=80 makes it pass.
+    const allPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
+    const llmsPages = allPages.slice(0, 9);
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+    ctx.options.coveragePassThreshold = 80;
+    ctx.options.coverageWarnThreshold = 50;
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(allPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.coverageRate).toBe(90);
+    expect(result.details?.coveragePassThreshold).toBe(80);
+    expect(result.details?.coverageWarnThreshold).toBe(50);
+  });
+
+  test('threshold of 0 makes check informational (always passes)', async () => {
+    const host = 'cov-informational.local';
+    // Only 2 of 10 pages = 20% coverage. With thresholds at 0, this passes.
+    const allPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
+    const llmsPages = allPages.slice(0, 2);
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+    ctx.options.coveragePassThreshold = 0;
+    ctx.options.coverageWarnThreshold = 0;
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(allPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.coverageRate).toBe(20);
+  });
+});
+
+describe('coverage exclusions', () => {
+  test('user exclusion patterns remove matching sitemap URLs from denominator', async () => {
+    const host = 'cov-exclusions.local';
+    const docPages = [`http://${host}/docs/guide`, `http://${host}/docs/api`];
+    const sitemapPages = [
+      ...docPages,
+      `http://${host}/docs/reference/v1/endpoint-a`,
+      `http://${host}/docs/reference/v1/endpoint-b`,
+      `http://${host}/docs/reference/v2/endpoint-a`,
+    ];
+
+    const ctx = makeCtx(host, docPages, '/docs');
+    ctx.options.coverageExclusions = ['/docs/reference/**'];
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.sitemapDocPages).toBe(2);
+    expect(result.details?.userExcludedPages).toBe(3);
+    expect(result.details?.coverageRate).toBe(100);
+  });
+
+  test('exclusion patterns work relative to base path', async () => {
+    const host = 'cov-exclusions-rel.local';
+    const docPages = [`http://${host}/docs/guide`];
+    const sitemapPages = [...docPages, `http://${host}/docs/archive/old-page`];
+
+    const ctx = makeCtx(host, docPages, '/docs');
+    ctx.options.coverageExclusions = ['/archive/**'];
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.sitemapDocPages).toBe(1);
+    expect(result.details?.userExcludedPages).toBe(1);
+  });
+});
+
+describe('omitted subtrees', () => {
+  test('excludes sitemap pages under omitted subtree prefixes', async () => {
+    const host = 'cov-omitted.local';
+    // Root llms.txt links to section indexes (depth 0)
+    const rootLlmsTxt = [
+      '# Docs\n',
+      `- [Chains](http://${host}/docs/chains/llms.txt)`,
+      `- [Intro](http://${host}/docs/intro)`,
+    ].join('\n');
+
+    // chains/llms.txt links to sub-section indexes (depth 1, omitted) + pages
+    const chainsLlmsTxt = [
+      '# Chains\n',
+      `- [Ethereum](http://${host}/docs/chains/ethereum/llms.txt)`,
+      `- [Solana](http://${host}/docs/chains/solana/llms.txt)`,
+      `- [Overview](http://${host}/docs/chains/overview)`,
+    ].join('\n');
+
+    // Sitemap has pages under the omitted subtrees
+    const sitemapPages = [
+      `http://${host}/docs/intro`,
+      `http://${host}/docs/chains/overview`,
+      `http://${host}/docs/chains/ethereum/method-a`,
+      `http://${host}/docs/chains/ethereum/method-b`,
+      `http://${host}/docs/chains/solana/method-a`,
+    ];
+
+    const baseUrl = `http://${host}/docs`;
+    const ctx = createContext(baseUrl, { requestDelay: 0 });
+    const discovered: DiscoveredFile[] = [
+      { url: `http://${host}/llms.txt`, content: rootLlmsTxt, status: 200, redirected: false },
+    ];
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'content-discoverability',
+      status: 'pass',
+      message: 'Found',
+      details: { discoveredFiles: discovered },
+    });
+
+    server.use(
+      // Depth-0 aggregate fetch: chains/llms.txt
+      http.get(
+        `http://${host}/docs/chains/llms.txt`,
+        () =>
+          new HttpResponse(chainsLlmsTxt, {
+            status: 200,
+            headers: { 'content-type': 'text/plain' },
+          }),
+      ),
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    // Pages directly verified: /docs/intro, /docs/chains/overview = 2
+    // Omitted subtrees: /docs/chains/ethereum (2 pages), /docs/chains/solana (1 page) = 3 excluded
+    // Coverage: 2/2 = 100%
+    expect(result.status).toBe('pass');
+    expect(result.details?.sitemapDocPages).toBe(2);
+    expect(result.details?.omittedSubtrees).toBe(2);
+    expect(result.details?.omittedSubtreePages).toBe(3);
+    expect(result.details?.coverageRate).toBe(100);
+    expect(result.message).toContain('nested indexes omitted');
+  });
+
+  test('omitted subtrees without matching sitemap pages do not affect results', async () => {
+    const host = 'cov-omitted-empty.local';
+    const rootLlmsTxt = [
+      '# Docs\n',
+      `- [Section](http://${host}/docs/section/llms.txt)`,
+      `- [Guide](http://${host}/docs/guide)`,
+    ].join('\n');
+
+    const sectionLlmsTxt = [
+      '# Section\n',
+      `- [SubSection](http://${host}/docs/section/sub/llms.txt)`,
+      `- [Page](http://${host}/docs/section/page)`,
+    ].join('\n');
+
+    const sitemapPages = [`http://${host}/docs/guide`, `http://${host}/docs/section/page`];
+
+    const baseUrl = `http://${host}/docs`;
+    const ctx = createContext(baseUrl, { requestDelay: 0 });
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'content-discoverability',
+      status: 'pass',
+      message: 'Found',
+      details: {
+        discoveredFiles: [
+          { url: `http://${host}/llms.txt`, content: rootLlmsTxt, status: 200, redirected: false },
+        ],
+      },
+    });
+
+    server.use(
+      http.get(
+        `http://${host}/docs/section/llms.txt`,
+        () =>
+          new HttpResponse(sectionLlmsTxt, {
+            status: 200,
+            headers: { 'content-type': 'text/plain' },
+          }),
+      ),
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.coverageRate).toBe(100);
+    // Omitted subtree /docs/section/sub has no matching sitemap pages
+    expect(result.details?.omittedSubtreePages ?? 0).toBe(0);
+  });
+});
+
+describe('compileExclusionMatcher', () => {
+  test('matches ** across segments', () => {
+    const matcher = compileExclusionMatcher(['/docs/reference/**']);
+    expect(matcher('/docs/reference/v1/endpoint')).toBe(true);
+    expect(matcher('/docs/reference')).toBe(true);
+    expect(matcher('/docs/guide')).toBe(false);
+  });
+
+  test('matches * within a segment', () => {
+    const matcher = compileExclusionMatcher(['/docs/v*/api']);
+    expect(matcher('/docs/v1/api')).toBe(true);
+    expect(matcher('/docs/v2/api')).toBe(true);
+    expect(matcher('/docs/v1/guide')).toBe(false);
+  });
+
+  test('multiple patterns', () => {
+    const matcher = compileExclusionMatcher(['/docs/changelog/**', '/docs/blog/**']);
+    expect(matcher('/docs/changelog/v1')).toBe(true);
+    expect(matcher('/docs/blog/post-1')).toBe(true);
+    expect(matcher('/docs/guide')).toBe(false);
+  });
+
+  test('empty patterns never match', () => {
+    const matcher = compileExclusionMatcher([]);
+    expect(matcher('/docs/anything')).toBe(false);
+  });
+});
+
+describe('extractOmittedPrefixes', () => {
+  test('extracts directory from .txt URLs', () => {
+    const prefixes = extractOmittedPrefixes([
+      'http://example.com/docs/chains/ethereum/llms.txt',
+      'http://example.com/docs/chains/solana/llms.txt',
+    ]);
+    expect(prefixes).toEqual(['/docs/chains/ethereum', '/docs/chains/solana']);
+  });
+
+  test('returns empty for empty input', () => {
+    expect(extractOmittedPrefixes([])).toEqual([]);
+  });
+});

From 77b7e7215582a0c5789a69305d40d0788e3283e8 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sat, 25 Apr 2026 15:51:31 -0400
Subject: [PATCH 04/13] Split 'llms-txt-directive' into 'html' and 'md'
 variants

---
 README.md                                     |   8 +-
 SCORING.md                                    |  21 +-
 docs/about.md                                 |   2 +-
 docs/agent-score-calculation.md               |  56 ++--
 docs/checks/content-discoverability.md        |  64 ++--
 docs/checks/index.md                          |   2 +-
 docs/ci-integration.md                        |   2 +-
 docs/improve-your-score.md                    |  10 +-
 docs/index.md                                 |   2 +-
 docs/interaction-diagnostics.md               |  14 +-
 docs/quick-start.md                           |   6 +-
 docs/reference/config-file.md                 |   2 +-
 docs/reference/scoring-api.md                 |   2 +-
 docs/what-is-agent-score.md                   |   6 +-
 scoring-reference.md                          |  78 +++--
 .../llms-txt-directive-html.ts                | 193 ++++++++++++
 .../llms-txt-directive-md.ts                  | 202 ++++++++++++
 .../llms-txt-directive.ts                     | 233 --------------
 src/checks/index.ts                           |   3 +-
 src/cli/formatters/text.ts                    |  16 +-
 src/scoring/coefficients.ts                   |   5 +-
 src/scoring/diagnostics.ts                    |  60 +++-
 src/scoring/proportions.ts                    |   3 +-
 src/scoring/resolutions.ts                    |  28 +-
 src/scoring/tag-scores.ts                     |   9 +-
 src/scoring/weights.ts                        |   3 +-
 test/integration/cli.test.ts                  |   2 +-
 ...est.ts => llms-txt-directive-html.test.ts} | 122 ++++----
 .../unit/checks/llms-txt-directive-md.test.ts | 287 ++++++++++++++++++
 test/unit/cli/formatters.test.ts              |   4 +-
 test/unit/scoring/coefficients.test.ts        |  24 +-
 test/unit/scoring/diagnostics.test.ts         |  95 +++++-
 test/unit/scoring/proportions.test.ts         |   6 +-
 test/unit/scoring/resolutions.test.ts         |   3 +-
 test/unit/scoring/score.test.ts               |  14 +-
 test/unit/scoring/tag-scores.test.ts          |  16 +-
 test/unit/scoring/weights.test.ts             |  14 +-
 37 files changed, 1152 insertions(+), 465 deletions(-)
 create mode 100644 src/checks/content-discoverability/llms-txt-directive-html.ts
 create mode 100644 src/checks/content-discoverability/llms-txt-directive-md.ts
 delete mode 100644 src/checks/content-discoverability/llms-txt-directive.ts
 rename test/unit/checks/{llms-txt-directive.test.ts => llms-txt-directive-html.test.ts} (81%)
 create mode 100644 test/unit/checks/llms-txt-directive-md.test.ts

diff --git a/README.md b/README.md
index 6fbe7ce..03f59f8 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
   <a href="https://www.npmjs.com/package/afdocs"><img src="https://img.shields.io/npm/v/afdocs" alt="npm"></a>
 </p>
 
-Test your documentation site against the [Agent-Friendly Documentation Spec](https://agentdocsspec.com). AFDocs runs 22 checks across 7 categories to measure how well AI coding agents can discover, navigate, and consume your docs.
+Test your documentation site against the [Agent-Friendly Documentation Spec](https://agentdocsspec.com). AFDocs runs 23 checks across 7 categories to measure how well AI coding agents can discover, navigate, and consume your docs.
 
 Powering [Agent Score](https://buildwithfern.com/agent-score) by Fern.
 
@@ -45,8 +45,8 @@ Agent-Friendly Docs Scorecard
       PASS  llms-txt-exists        llms.txt found at /llms.txt
       WARN  llms-txt-size          llms.txt is 65,000 characters
             Fix: If it grows further, split into nested llms.txt files ...
-      FAIL  llms-txt-directive     No directive detected on any tested page
-            Fix: Add a blockquote near the top of each page ...
+      FAIL  llms-txt-directive-html No directive detected in HTML of any tested page
+            Fix: Add a visually-hidden element near the top of each page ...
 ```
 
 ## Install
@@ -69,7 +69,7 @@ Full documentation is available at **[afdocs.dev](https://afdocs.dev)**:
 
 - [Understand Your Score](https://afdocs.dev/what-is-agent-score) — what the score means and how it's calculated
 - [Improve Your Score](https://afdocs.dev/improve-your-score) — prioritized fix guide
-- [Checks Reference](https://afdocs.dev/checks/) — all 22 checks with fix suggestions
+- [Checks Reference](https://afdocs.dev/checks/) — all 23 checks with fix suggestions
 - [CLI Reference](https://afdocs.dev/reference/cli) — flags, output formats, sampling strategies
 - [CI Integration](https://afdocs.dev/ci-integration) — vitest helpers for your pipeline
 - [Programmatic API](https://afdocs.dev/reference/programmatic-api) — TypeScript API for custom tooling
diff --git a/SCORING.md b/SCORING.md
index 9e57247..85879fa 100644
--- a/SCORING.md
+++ b/SCORING.md
@@ -4,7 +4,7 @@ Scoring Version: 0.1.0 · [Agent-Friendly Docs Spec v0.3.0](https://agentdocsspe
 
 ## What is this score?
 
-The Agent-Friendly Docs Scorecard measures how effectively AI coding agents can discover, navigate, and consume a documentation site. It runs 22 automated checks against your site and produces a 0–100 score with a letter grade.
+The Agent-Friendly Docs Scorecard measures how effectively AI coding agents can discover, navigate, and consume a documentation site. It runs 23 automated checks against your site and produces a 0–100 score with a letter grade.
 
 Each check corresponds to a section of the [Agent-Friendly Docs Spec](https://agentdocsspec.com), which documents what the check measures, why it matters for real agent workflows, and the observed behaviors that motivated it. This document covers how checks are **scored**, not what they **measure**. If you want to understand a specific check in depth, follow the spec links in the table below.
 
@@ -23,7 +23,7 @@ The score reflects how well agents can _actually use_ your documentation, not ju
 
 ## What we check
 
-The 22 checks are grouped into seven categories. Each check is assigned a **weight tier** based on its observed impact on agent workflows:
+The 23 checks are grouped into seven categories. Each check is assigned a **weight tier** based on its observed impact on agent workflows:
 
 - **Critical (10 pts)**: Agents cannot function without this. Failure means zero content, zero navigation, or zero access.
 - **High (7 pts)**: Directly limits agent effectiveness. Failure means truncation, dead ends, or agents stuck on a worse path.
@@ -41,7 +41,8 @@ How agents find and navigate your documentation.
 | [llms-txt-size](https://agentdocsspec.com/spec/#llms-txt-size)                     | High (7)      | Whether your llms.txt fits within agent context windows. Truncated indexes defeat their purpose.                         |
 | [llms-txt-links-resolve](https://agentdocsspec.com/spec/#llms-txt-links-resolve)   | High (7)      | Whether links in your llms.txt actually work. Broken links send agents down dead ends with high confidence.              |
 | [llms-txt-links-markdown](https://agentdocsspec.com/spec/#llms-txt-links-markdown) | High (7)      | Whether llms.txt links point to markdown rather than HTML. Agents work significantly less effectively with HTML content. |
-| [llms-txt-directive](https://agentdocsspec.com/spec/#llms-txt-directive)           | High (7)      | Whether your docs pages tell agents where to find llms.txt. Without this, agents won't know it exists.                   |
+| [llms-txt-directive-html](https://agentdocsspec.com/spec/#llms-txt-directive-html) | High (7)      | Whether your HTML pages tell agents where to find llms.txt. Without this, agents won't know it exists.                   |
+| [llms-txt-directive-md](https://agentdocsspec.com/spec/#llms-txt-directive-md)     | Medium (4)    | Whether your markdown pages tell agents where to find llms.txt.                                                          |
 
 ### Markdown Availability
 
@@ -129,7 +130,7 @@ Not all warnings represent the same degree of degradation. A warning on `llms-tx
 | Coefficient | Meaning                                  | Checks                                                                                                                                                                                                                                                                                 |
 | ----------- | ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | **0.75**    | Content substantively intact             | `llms-txt-valid`, `content-negotiation`, `llms-txt-links-resolve`, `llms-txt-coverage`, `markdown-content-parity`                                                                                                                                                                      |
-| **0.60**    | Partial coverage or platform-dependent   | `llms-txt-directive`, `redirect-behavior`                                                                                                                                                                                                                                              |
+| **0.60**    | Partial coverage or platform-dependent   | `llms-txt-directive-html`, `llms-txt-directive-md`, `redirect-behavior`                                                                                                                                                                                                                |
 | **0.50**    | Genuine functional degradation           | `llms-txt-exists`, `llms-txt-size`, `rendering-strategy`, `markdown-url-support`, `page-size-markdown`, `page-size-html`, `content-start-position`, `tabbed-content-serialization`, `section-header-quality`, `cache-header-hygiene`, `auth-gate-detection`, `auth-alternative-access` |
 | **0.25**    | Actively steering agents to a worse path | `llms-txt-links-markdown` (markdown exists but llms.txt links to HTML; agents don't discover .md variants on their own)                                                                                                                                                                |
 
@@ -160,11 +161,19 @@ Some problems only become visible when you look at multiple checks together. The
 
 ### Markdown support is undiscoverable
 
-**Triggers when** your site serves markdown at .md URLs, but none of the discovery mechanisms (content negotiation, llms.txt directive, .md links in llms.txt) are in place.
+**Triggers when** your site serves markdown at .md URLs, but there is no agent-facing directive on HTML pages pointing to llms.txt and the server does not support content negotiation.
 
 **What it means**: You've done the work to support markdown, but agents have no way to find out. They'll default to the HTML path. In observed agent behavior, agents do not independently discover .md URL variants; they need to be told.
 
-**What to do**: Add a directive on your docs pages pointing to llms.txt, or implement content negotiation for `Accept: text/markdown`. Either change makes your existing markdown support visible to agents.
+**What to do**: Add a directive on your docs pages pointing to llms.txt, and implement content negotiation for `Accept: text/markdown`. The directive is the primary discovery mechanism because it reaches all agents; content negotiation provides a fast path for agents that request markdown by default. Both are recommended.
+
+### Markdown support is only partially discoverable
+
+**Triggers when** your site serves markdown at .md URLs and supports content negotiation, but there is no agent-facing directive on HTML pages pointing to llms.txt.
+
+**What it means**: Agents that send `Accept: text/markdown` (Claude Code, Cursor, OpenCode) get markdown automatically, but the majority of agents fetch HTML by default and have no signal that a markdown path exists.
+
+**What to do**: Add a directive near the top of each HTML page pointing to your llms.txt. If your site serves markdown, mention that in the directive too. The directive reaches all agents, not just the ones that request markdown by default.
 
 ### Truncated index
 
diff --git a/docs/about.md b/docs/about.md
index 97b1e68..a1d8ff8 100644
--- a/docs/about.md
+++ b/docs/about.md
@@ -3,7 +3,7 @@
 <div class="about-intro">
 <div class="about-text">
 
-AFDocs is an open-source tool that tests documentation sites against the [Agent-Friendly Documentation Spec](https://agentdocsspec.com). The spec defines what makes documentation accessible to AI coding agents, based on observed behavior across real agent platforms. AFDocs automates those observations into 22 checks that produce a score and actionable fix suggestions.
+AFDocs is an open-source tool that tests documentation sites against the [Agent-Friendly Documentation Spec](https://agentdocsspec.com). The spec defines what makes documentation accessible to AI coding agents, based on observed behavior across real agent platforms. AFDocs automates those observations into 23 checks that produce a score and actionable fix suggestions.
 
 </div>
 <div class="about-logo">
diff --git a/docs/agent-score-calculation.md b/docs/agent-score-calculation.md
index 640f4ab..99f020e 100644
--- a/docs/agent-score-calculation.md
+++ b/docs/agent-score-calculation.md
@@ -1,6 +1,6 @@
 # Score Calculation
 
-The Agent Score is a weighted average of 22 check results, adjusted for interaction effects between checks. This page covers the mechanics: how checks are weighted, how multi-page results are scored proportionally, and how the system handles cases where checks influence each other.
+The Agent Score is a weighted average of 23 check results, adjusted for interaction effects between checks. This page covers the mechanics: how checks are weighted, how multi-page results are scored proportionally, and how the system handles cases where checks influence each other.
 
 Each check corresponds to a section of the [Agent-Friendly Documentation Spec](https://agentdocsspec.com), which documents what the check measures and the observed agent behaviors that motivated it. For what each check measures, see the [Checks Reference](/checks/).
 
@@ -37,7 +37,8 @@ Every check is assigned a weight tier based on its observed impact on agent work
 | `llms-txt-size`           | High (7)      |
 | `llms-txt-links-resolve`  | High (7)      |
 | `llms-txt-links-markdown` | High (7)      |
-| `llms-txt-directive`      | High (7)      |
+| `llms-txt-directive-html` | High (7)      |
+| `llms-txt-directive-md`   | Medium (4)    |
 
 ### Markdown Availability
 
@@ -93,24 +94,25 @@ Checks that test multiple pages use proportional scoring. If `page-size-html` te
 
 These checks sample pages from your site and score based on the pass rate across those pages:
 
-| Check                          | What's measured per page                                    |
-| ------------------------------ | ----------------------------------------------------------- |
-| `rendering-strategy`           | Whether the page is server-rendered or an SPA shell         |
-| `page-size-html`               | Whether the HTML-to-text conversion fits within size limits |
-| `page-size-markdown`           | Whether the markdown version fits within size limits        |
-| `content-start-position`       | How far into the response actual content begins             |
-| `content-negotiation`          | Whether the server returns markdown for this page           |
-| `markdown-url-support`         | Whether the `.md` URL variant returns markdown              |
-| `http-status-codes`            | Whether a fabricated bad URL returns a proper 404           |
-| `redirect-behavior`            | Whether redirects use standard HTTP methods                 |
-| `auth-gate-detection`          | Whether the page is publicly accessible                     |
-| `llms-txt-directive`           | Whether the page includes a directive pointing to llms.txt  |
-| `tabbed-content-serialization` | Whether tabbed content creates oversized output             |
-| `section-header-quality`       | Whether tab section headers include variant context         |
-| `markdown-code-fence-validity` | Whether code fences are properly closed                     |
-| `markdown-content-parity`      | Whether markdown and HTML versions match                    |
-| `cache-header-hygiene`         | Whether cache headers allow timely updates                  |
-| `auth-alternative-access`      | Whether auth-gated pages have alternative access paths      |
+| Check                          | What's measured per page                                            |
+| ------------------------------ | ------------------------------------------------------------------- |
+| `rendering-strategy`           | Whether the page is server-rendered or an SPA shell                 |
+| `page-size-html`               | Whether the HTML-to-text conversion fits within size limits         |
+| `page-size-markdown`           | Whether the markdown version fits within size limits                |
+| `content-start-position`       | How far into the response actual content begins                     |
+| `content-negotiation`          | Whether the server returns markdown for this page                   |
+| `markdown-url-support`         | Whether the `.md` URL variant returns markdown                      |
+| `http-status-codes`            | Whether a fabricated bad URL returns a proper 404                   |
+| `redirect-behavior`            | Whether redirects use standard HTTP methods                         |
+| `auth-gate-detection`          | Whether the page is publicly accessible                             |
+| `llms-txt-directive-html`      | Whether the HTML page includes a directive pointing to llms.txt     |
+| `llms-txt-directive-md`        | Whether the markdown page includes a directive pointing to llms.txt |
+| `tabbed-content-serialization` | Whether tabbed content creates oversized output                     |
+| `section-header-quality`       | Whether tab section headers include variant context                 |
+| `markdown-code-fence-validity` | Whether code fences are properly closed                             |
+| `markdown-content-parity`      | Whether markdown and HTML versions match                            |
+| `cache-header-hygiene`         | Whether cache headers allow timely updates                          |
+| `auth-alternative-access`      | Whether auth-gated pages have alternative access paths              |
 
 ### Single-resource checks (all-or-nothing)
 
@@ -134,7 +136,7 @@ A warning is not a binary "half credit." Different warnings represent different
 | Coefficient | Meaning                                  | Checks                                                                                                                                                                                                                                                                                 |
 | ----------- | ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | **0.75**    | Content substantively intact             | `llms-txt-valid`, `content-negotiation`, `llms-txt-links-resolve`, `llms-txt-coverage`, `markdown-content-parity`                                                                                                                                                                      |
-| **0.60**    | Partial coverage or platform-dependent   | `llms-txt-directive`, `redirect-behavior`                                                                                                                                                                                                                                              |
+| **0.60**    | Partial coverage or platform-dependent   | `llms-txt-directive-html`, `llms-txt-directive-md`, `redirect-behavior`                                                                                                                                                                                                                |
 | **0.50**    | Genuine functional degradation           | `llms-txt-exists`, `llms-txt-size`, `rendering-strategy`, `markdown-url-support`, `page-size-markdown`, `page-size-html`, `content-start-position`, `tabbed-content-serialization`, `section-header-quality`, `cache-header-hygiene`, `auth-gate-detection`, `auth-alternative-access` |
 | **0.25**    | Actively steering agents to a worse path | `llms-txt-links-markdown` (markdown exists but llms.txt links to HTML)                                                                                                                                                                                                                 |
 
@@ -165,12 +167,12 @@ Some checks only matter if agents can actually reach the content they measure. I
 
 These checks measure markdown path quality. But if agents can't discover that path, the quality is irrelevant.
 
-| Condition                   | Coefficient | Why                                                             |
-| --------------------------- | ----------- | --------------------------------------------------------------- |
-| Content negotiation passes  | 1.0         | Agents that request it get markdown automatically.              |
-| llms.txt directive passes   | 0.8         | Effective, but agents sometimes ignore the directive.           |
-| llms.txt links use .md URLs | 0.5         | Agents must find llms.txt first, then follow .md links.         |
-| None of the above           | 0.0         | Agents won't find the markdown path. Check excluded from score. |
+| Condition                                                   | Coefficient | Why                                                             |
+| ----------------------------------------------------------- | ----------- | --------------------------------------------------------------- |
+| Content negotiation passes                                  | 1.0         | Agents that request it get markdown automatically.              |
+| `llms-txt-directive-html` or `llms-txt-directive-md` passes | 0.8         | Effective, but agents sometimes ignore the directive.           |
+| llms.txt links use .md URLs                                 | 0.5         | Agents must find llms.txt first, then follow .md links.         |
+| None of the above                                           | 0.0         | Agents won't find the markdown path. Check excluded from score. |
 
 If multiple conditions are met, the highest coefficient applies.
 
diff --git a/docs/checks/content-discoverability.md b/docs/checks/content-discoverability.md
index b2d2fc8..a5dd8dd 100644
--- a/docs/checks/content-discoverability.md
+++ b/docs/checks/content-discoverability.md
@@ -2,7 +2,7 @@
 
 How agents find and navigate your documentation. This category covers `llms.txt`: whether it exists, whether agents can parse it, whether the links work, and whether agents visiting individual pages know it's there.
 
-These six checks carry the most combined weight of any category. Without discoverability, everything else is less useful.
+These seven checks carry the most combined weight of any category. Without discoverability, everything else is less useful.
 
 ## llms-txt-exists
 
@@ -198,32 +198,62 @@ A warning on this check carries a bigger penalty in scoring because it means mar
 
 ---
 
-## llms-txt-directive
+## llms-txt-directive-html
 
-Whether your documentation pages tell agents where to find `llms.txt`.
+Whether the HTML version of your documentation pages tells agents where to find `llms.txt`.
 
-|            |                                                                          |
-| ---------- | ------------------------------------------------------------------------ |
-| **Weight** | High (7)                                                                 |
-| **Spec**   | [llms-txt-directive](https://agentdocsspec.com/spec/#llms-txt-directive) |
+|            |                                                                                    |
+| ---------- | ---------------------------------------------------------------------------------- |
+| **Weight** | High (7)                                                                           |
+| **Spec**   | [llms-txt-directive-html](https://agentdocsspec.com/spec/#llms-txt-directive-html) |
 
 ### Why it matters
 
-Agents don't know to look for `llms.txt` by default. When they land on an individual documentation page, they have no way to discover that a navigation index exists unless the page tells them. A blockquote directive near the top of each page is the agent equivalent of a "You Are Here" marker.
+Agents that fetch the HTML version of your pages have no built-in way to discover that a documentation index exists at `/llms.txt`. An in-page directive serves as an agent "You Are Here" marker that points them to the index. The directive can be visually hidden (e.g., using a CSS clip-rect technique or `sr-only` class) as long as it remains in the DOM and survives HTML-to-markdown conversion.
 
-In practice, agents that see the directive can follow it and use the index to navigate. In testing, Anthropic's Claude Code documentation used this pattern, and it worked reliably for Claude agents. However, agents are non-deterministic, and platforms implement functionality in different ways, so efficacy may vary across agents. This is more of a suggestion than a guarantee.
+This check searches within the HTML `<body>` but excludes `<nav>`, `<script>`, and `<style>` elements. Navigation sidebar links to pages _about_ llms.txt (e.g., a docs page describing the llms.txt feature) are not counted as directives.
 
 ### Results
 
-| Result | Condition                                                                  |
-| ------ | -------------------------------------------------------------------------- |
-| Pass   | Directive found on all or nearly all documentation pages, near the top     |
-| Warn   | Found on some pages but missing from others, or buried past 50% of content |
-| Fail   | Not detected on any tested page                                            |
+| Result | Condition                                                                      |
+| ------ | ------------------------------------------------------------------------------ |
+| Pass   | Directive found in HTML of all or nearly all documentation pages, near the top |
+| Warn   | Found on some pages but missing from others, or buried past 50% of content     |
+| Fail   | Not detected in the HTML of any tested page                                    |
 
 ### How to fix
 
-Add a blockquote near the top of each documentation page pointing to your `llms.txt`. For example:
+Add a visually-hidden element near the top of each page (e.g., a `<div>` with CSS clip-rect) containing a link to your `llms.txt`. If your site serves markdown versions of pages, mention that in the directive too so agents know to request it. This can be added through your docs platform's page template or layout component. Use server-rendered HTML, not client-side JavaScript injection.
+
+---
+
+## llms-txt-directive-md
+
+Whether the markdown version of your documentation pages tells agents where to find `llms.txt`.
+
+|                |                                                                                |
+| -------------- | ------------------------------------------------------------------------------ |
+| **Weight**     | Medium (4)                                                                     |
+| **Depends on** | `markdown-url-support` or `content-negotiation`                                |
+| **Spec**       | [llms-txt-directive-md](https://agentdocsspec.com/spec/#llms-txt-directive-md) |
+
+### Why it matters
+
+Agents that fetch markdown versions of pages (via `.md` URLs or content negotiation) benefit from a directive pointing them to the documentation index. Anthropic's Claude Code documentation includes a blockquote at the top of every markdown page telling agents to fetch the documentation index at `llms.txt`. In practice, agents that encounter this directive may follow it to discover the full documentation index.
+
+This check is skipped if the site doesn't serve markdown (neither `markdown-url-support` nor `content-negotiation` passes).
+
+### Results
+
+| Result | Condition                                                                          |
+| ------ | ---------------------------------------------------------------------------------- |
+| Pass   | Directive found in markdown of all or nearly all documentation pages, near the top |
+| Warn   | Found on some pages but missing from others, or buried past 50% of content         |
+| Fail   | Not detected in the markdown of any tested page                                    |
+
+### How to fix
+
+Add a blockquote near the top of each markdown page pointing to your `llms.txt`. For example:
 
 ```markdown
 > For the complete documentation index, see [llms.txt](/llms.txt)
@@ -231,8 +261,6 @@ Add a blockquote near the top of each documentation page pointing to your `llms.
 
 The URL in the directive should match wherever you placed your `llms.txt`. If it's at `/docs/llms.txt`, use that path instead.
 
-This can typically be added through your docs platform's page template or layout component. It can be visually hidden with CSS while remaining accessible to agents, as long as it's in the server-rendered HTML (not injected by client-side JavaScript).
-
 ### Score impact
 
-This check is one of the signals used by the [discovery coefficient](/agent-score-calculation#discovery-coefficient). If neither this check nor content negotiation passes, downstream markdown quality checks are discounted because agents can't find the markdown path.
+These checks are signals used by the [discovery coefficient](/agent-score-calculation#discovery-coefficient). If neither directive check nor content negotiation passes, downstream markdown quality checks are discounted because agents can't find the markdown path.
diff --git a/docs/checks/index.md b/docs/checks/index.md
index 7af8b4b..0a0f11b 100644
--- a/docs/checks/index.md
+++ b/docs/checks/index.md
@@ -1,6 +1,6 @@
 # Checks Reference
 
-AFDocs runs 22 checks across 7 categories. Each check implements a section of the [Agent-Friendly Documentation Spec](https://agentdocsspec.com/spec), which documents the observed agent behaviors and failure modes that motivated the check.
+AFDocs runs 23 checks across 7 categories. Each check implements a section of the [Agent-Friendly Documentation Spec](https://agentdocsspec.com/spec), which documents the observed agent behaviors and failure modes that motivated the check.
 
 ## Categories
 
diff --git a/docs/ci-integration.md b/docs/ci-integration.md
index 95443ad..355db76 100644
--- a/docs/ci-integration.md
+++ b/docs/ci-integration.md
@@ -69,7 +69,7 @@ Checks not in the list show as skipped in the test output.
 ```yaml
 url: https://docs.example.com
 
-# Optional: run only specific checks (omit to run all 22)
+# Optional: run only specific checks (omit to run all 23)
 # checks:
 #   - llms-txt-exists
 #   - llms-txt-valid
diff --git a/docs/improve-your-score.md b/docs/improve-your-score.md
index 05d68cd..2227649 100644
--- a/docs/improve-your-score.md
+++ b/docs/improve-your-score.md
@@ -28,7 +28,7 @@ The scorecard tells you _what's wrong_. The verbose text output tells you _where
 
 ## Step 3: Work through fixes iteratively
 
-You don't need to run all 22 checks every time you make a change. Target just the check you're fixing for fast feedback:
+You don't need to run all 23 checks every time you make a change. Target just the check you're fixing for fast feedback:
 
 ```bash
 # Iterate on llms.txt
@@ -47,7 +47,8 @@ checks:
   - llms-txt-valid
   - llms-txt-size
   - llms-txt-links-resolve
-  - llms-txt-directive
+  - llms-txt-directive-html
+  - llms-txt-directive-md
   - rendering-strategy
   - page-size-html
   - content-start-position
@@ -87,9 +88,9 @@ At 50%+ gated pages, the score is [capped at D](/agent-score-calculation#score-c
 
 If `markdown-url-support` fails, agents are stuck with HTML. Many docs platforms support this natively (VitePress, for example, serves markdown at `.md` URLs out of the box). Others need a server configuration change.
 
-**Add an llms.txt directive to pages**
+**Add an llms.txt directive to HTML pages**
 
-If `llms-txt-directive` fails, agents visiting individual pages have no way to discover your llms.txt. Add a blockquote directive near the top of each page, typically through your docs platform's page template or layout component.
+If `llms-txt-directive-html` fails, agents visiting individual pages have no way to discover your llms.txt. Add a visually-hidden element near the top of each page pointing to your llms.txt. If your site serves markdown, mention that in the directive too so agents know to request it.
 
 **Fix broken llms.txt links**
 
@@ -107,6 +108,7 @@ If `llms-txt-size` warns or fails, agents are seeing a truncated version of your
 
 These are worth addressing but won't move the score as dramatically:
 
+- **llms.txt directive in markdown** (`llms-txt-directive-md`): Add a blockquote near the top of each markdown page pointing to your llms.txt.
 - **Content negotiation** (`content-negotiation`): Return markdown when agents send `Accept: text/markdown`. Requires server-side support.
 - **Content start position** (`content-start-position`): Reduce boilerplate (inline CSS/JS, navigation markup) before the main content. Move styles and scripts to external files.
 - **Tabbed content** (`tabbed-content-serialization`): If tabbed UI components create oversized output, consider restructuring into separate pages or using query params to retrieve only specific tab versions.
diff --git a/docs/index.md b/docs/index.md
index 0426a5c..2193e33 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -17,7 +17,7 @@ hero:
       link: /quick-start
 
 features:
-  - title: 22 Checks, 7 Categories
+  - title: 23 Checks, 7 Categories
     details: From content discoverability to markdown availability, AFDocs tests everything that affects how agents interact with your docs.
   - title: Actionable Fixes
     details: Every failing check comes with a specific fix suggestion, so you know exactly what to change.
diff --git a/docs/interaction-diagnostics.md b/docs/interaction-diagnostics.md
index 10a6227..186b4e3 100644
--- a/docs/interaction-diagnostics.md
+++ b/docs/interaction-diagnostics.md
@@ -6,14 +6,24 @@ These diagnostics appear in the "Interaction Diagnostics" section of the `--form
 
 ## Markdown support is undiscoverable
 
-**Triggers when** your site serves markdown at `.md` URLs, but none of the discovery mechanisms exist: no content negotiation, no llms.txt directive on pages, and no `.md` links in llms.txt.
+**Triggers when** your site serves markdown at `.md` URLs, but there is no agent-facing directive on HTML pages pointing to llms.txt and the server does not support content negotiation.
 
 **What it means**: You've done the work to support markdown, but agents have no way to find out. They'll default to the HTML path every time. In observed agent behavior, agents do not independently discover `.md` URL variants; they need to be told.
 
-**What to do**: Add a [directive](/checks/content-discoverability#llms-txt-directive) on your docs pages pointing to llms.txt, or implement [content negotiation](/checks/markdown-availability#content-negotiation) for `Accept: text/markdown`. Either change makes your existing markdown support visible to agents.
+**What to do**: Add a [directive](/checks/content-discoverability#llms-txt-directive-html) on your docs pages pointing to llms.txt, and implement [content negotiation](/checks/markdown-availability#content-negotiation) for `Accept: text/markdown`. The directive is the primary discovery mechanism because it reaches all agents; content negotiation provides a fast path for agents that request markdown by default. Both are recommended.
 
 **Score impact**: Markdown quality checks (`page-size-markdown`, `markdown-code-fence-validity`, `markdown-content-parity`) are excluded from the score entirely when this diagnostic fires, because their results don't reflect real agent experience.
 
+## Markdown support is only partially discoverable
+
+**Triggers when** your site serves markdown at `.md` URLs and supports content negotiation, but there is no agent-facing directive on HTML pages pointing to llms.txt.
+
+**What it means**: Agents that send `Accept: text/markdown` (Claude Code, Cursor, OpenCode) get markdown automatically, but the majority of agents fetch HTML by default and have no signal that a markdown path exists. Your markdown support benefits a subset of agents but not most of them.
+
+**What to do**: Add a [directive](/checks/content-discoverability#llms-txt-directive-html) near the top of each HTML page pointing to your llms.txt. If your site serves markdown, mention that in the directive too. The directive reaches all agents, not just the ones that request markdown by default.
+
+**Score impact**: Same as the undiscoverable case: markdown quality checks are excluded from the score because most agents still can't find the markdown path.
+
 ## Truncated index
 
 **Triggers when** your llms.txt exists but exceeds agent context limits (the `llms-txt-size` check warns or fails).
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 7891c9c..9ff601f 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -12,7 +12,7 @@ No install needed. Point AFDocs at your documentation site:
 npx afdocs check https://docs.example.com --format scorecard
 ```
 
-This discovers pages from your site (via llms.txt, sitemap, or both), samples up to 50, runs all 22 checks, and produces a scorecard with your overall score, per-category breakdowns, and fix suggestions:
+This discovers pages from your site (via llms.txt, sitemap, or both), samples up to 50, runs all 23 checks, and produces a scorecard with your overall score, per-category breakdowns, and fix suggestions:
 
 ```
 Agent-Friendly Docs Scorecard
@@ -35,7 +35,7 @@ Agent-Friendly Docs Scorecard
     Content Discoverability
       PASS  llms-txt-exists        llms.txt found at /llms.txt
       WARN  llms-txt-size          llms.txt is 65,000 characters
-      FAIL  llms-txt-directive     No directive detected on any tested page
+      FAIL  llms-txt-directive-html No directive detected in HTML of any tested page
             Fix: Add a blockquote near the top of each page ...
 ```
 
@@ -71,7 +71,7 @@ npx afdocs check https://docs.example.com --format json --score
 
 ## Run specific checks
 
-If you're working on a particular issue, you don't need to run all 22 checks every time. Pass a comma-separated list of check IDs:
+If you're working on a particular issue, you don't need to run all 23 checks every time. Pass a comma-separated list of check IDs:
 
 ```bash
 npx afdocs check https://docs.example.com --checks llms-txt-exists,llms-txt-valid,llms-txt-size
diff --git a/docs/reference/config-file.md b/docs/reference/config-file.md
index a8addf8..f31f28e 100644
--- a/docs/reference/config-file.md
+++ b/docs/reference/config-file.md
@@ -59,7 +59,7 @@ The documentation site URL to check. This is the only required field.
 
 ### `checks` (optional)
 
-A list of check IDs to run. If omitted, all 22 checks run. Use this to focus on checks that are actionable for your platform. See the [Checks Reference](/checks/) for the full list of check IDs.
+A list of check IDs to run. If omitted, all 23 checks run. Use this to focus on checks that are actionable for your platform. See the [Checks Reference](/checks/) for the full list of check IDs.
 
 This is particularly useful when your docs platform doesn't support certain capabilities. For example, if you can't serve markdown, exclude the markdown-related checks so your score reflects what you can control. See [Improve Your Score](/improve-your-score#step-3-work-through-fixes-iteratively) for more on this approach.
 
diff --git a/docs/reference/scoring-api.md b/docs/reference/scoring-api.md
index 2d5e02f..3bcdafe 100644
--- a/docs/reference/scoring-api.md
+++ b/docs/reference/scoring-api.md
@@ -14,7 +14,7 @@ console.log(score.overall); // 72
 console.log(score.grade); // 'C'
 console.log(score.categoryScores); // { 'content-discoverability': { score: 80, grade: 'B' }, ... }
 console.log(score.diagnostics); // [{ id: 'markdown-undiscoverable', severity: 'warning', ... }]
-console.log(score.resolutions); // { 'llms-txt-directive': 'Add a blockquote near the top...' }
+console.log(score.resolutions); // { 'llms-txt-directive-html': 'Add a visually-hidden element...' }
 ```
 
 `computeScore` is a pure function. It does not modify the report or make any network requests. Composition is the consumer's responsibility: the CLI formatters compose `runChecks` and `computeScore`; external consumers call them separately.
diff --git a/docs/what-is-agent-score.md b/docs/what-is-agent-score.md
index 758c38c..43fcaad 100644
--- a/docs/what-is-agent-score.md
+++ b/docs/what-is-agent-score.md
@@ -1,6 +1,6 @@
 # What Is the Agent Score?
 
-The Agent Score is a 0–100 rating of how well AI coding agents can discover, navigate, and consume your documentation site. You can get your score with [AFDocs](https://www.npmjs.com/package/afdocs), which runs 22 automated checks based on the [Agent-Friendly Documentation Spec](https://agentdocsspec.com) and maps the results to a letter grade.
+The Agent Score is a 0–100 rating of how well AI coding agents can discover, navigate, and consume your documentation site. You can get your score with [AFDocs](https://www.npmjs.com/package/afdocs), which runs 23 automated checks based on the [Agent-Friendly Documentation Spec](https://agentdocsspec.com) and maps the results to a letter grade.
 
 The AI coding agents that regularly consume your documentation while helping developers perform tasks include:
 
@@ -25,7 +25,7 @@ For the full business case, including how agents fail on documentation, the conc
 
 ## What the score measures
 
-The 22 checks cover seven categories:
+The 23 checks cover seven categories:
 
 | Category                                                   | What it tests                                                                                      |
 | ---------------------------------------------------------- | -------------------------------------------------------------------------------------------------- |
@@ -75,7 +75,7 @@ Agent-Friendly Docs Scorecard
     Content Discoverability
       PASS  llms-txt-exists        llms.txt found at /llms.txt
       WARN  llms-txt-size          llms.txt is 65,000 characters
-      FAIL  llms-txt-directive     No directive detected on any tested page
+      FAIL  llms-txt-directive-html No directive detected in HTML of any tested page
             Fix: Add a blockquote near the top of each page ...
 ```
 
diff --git a/scoring-reference.md b/scoring-reference.md
index 1618daa..7cd3c52 100644
--- a/scoring-reference.md
+++ b/scoring-reference.md
@@ -55,7 +55,8 @@ and the empirical evidence sections in each check definition.
 | `page-size-markdown`           | High     | 7      | Direct truncation risk on the best-case content path.                                                                                                 |
 | `page-size-html`               | High     | 7      | Affects the majority of agents, which receive HTML.                                                                                                   |
 | `http-status-codes`            | High     | 7      | Soft 404s actively mislead agents into extracting info from error pages.                                                                              |
-| `llms-txt-directive`           | High     | 7      | Discoverability multiplier. Amplifies value of llms.txt and markdown support.                                                                         |
+| `llms-txt-directive-html`      | High     | 7      | Discoverability multiplier for HTML path. Tells agents about llms.txt.                                                                                |
+| `llms-txt-directive-md`        | Medium   | 4      | Discoverability multiplier for markdown path. Tells agents about llms.txt.                                                                            |
 | `llms-txt-valid`               | Medium   | 4      | Structure helps parsing, but even non-standard llms.txt with links is useful.                                                                         |
 | `content-negotiation`          | Medium   | 4      | Only some agents send Accept: text/markdown. Valuable but not universal.                                                                              |
 | `content-start-position`       | Medium   | 4      | Boilerplate preamble on HTML path wastes truncation budget.                                                                                           |
@@ -111,7 +112,8 @@ Each check has a specific warn coefficient rather than a uniform default.
 | `llms-txt-coverage`                                          | 0.75       | 80-95% of pages covered. Most of the site is represented in the index.                                                                                                                       |
 | `markdown-content-parity`                                    | 0.75       | Minor formatting differences, not substantive content drift.                                                                                                                                 |
 | **0.60: Partial coverage or platform-dependent**             |            |                                                                                                                                                                                              |
-| `llms-txt-directive`                                         | 0.60       | Present on some pages but not others. Agents that land on covered pages benefit; others get no guidance.                                                                                     |
+| `llms-txt-directive-html`                                    | 0.60       | Present in HTML of some pages but not others. Agents that land on covered pages benefit; others get no guidance.                                                                             |
+| `llms-txt-directive-md`                                      | 0.60       | Present in markdown of some pages but not others.                                                                                                                                            |
 | `redirect-behavior`                                          | 0.60       | Cross-host HTTP redirects: some agents follow them, some don't. Platform-dependent outcome.                                                                                                  |
 | **0.25: Actively steering agents away from the better path** |            |                                                                                                                                                                                              |
 | `llms-txt-links-markdown`                                    | 0.25       | Markdown variants exist but llms.txt links to HTML. The one place you control agent navigation actively directs agents away from markdown. Agents don't independently discover .md variants. |
@@ -158,7 +160,8 @@ these fields from `details`:
 | `section-header-quality`       | `pageResults` array, count per-status                                     |
 | `http-status-codes`            | `pageResults` array, count per-status                                     |
 | `redirect-behavior`            | `pageResults` array, count per-status                                     |
-| `llms-txt-directive`           | `pageResults` array, count per-status                                     |
+| `llms-txt-directive-html`      | `pageResults` array, count per-status                                     |
+| `llms-txt-directive-md`        | `pageResults` array, count per-status                                     |
 | `cache-header-hygiene`         | `passBucket`, `warnBucket`, `failBucket`                                  |
 | `markdown-content-parity`      | `passBucket`, `warnBucket`, `failBucket`                                  |
 | `auth-gate-detection`          | `pageResults` array, count per-status                                     |
@@ -276,15 +279,16 @@ markdown delivers zero value to agents today.
 
 `markdown-url-support` is excluded from this coefficient because it measures
 whether the capability exists, not the quality of an established path. A site
-should get credit for serving markdown (and the `markdown-undiscoverable`
-diagnostic tells them to make it discoverable), but the downstream quality
+should get credit for serving markdown (and the `markdown-undiscoverable` or
+`markdown-partially-discoverable` diagnostic tells them to make it
+discoverable), but the downstream quality
 checks only matter if agents actually reach the markdown.
 
 ```
 discovery_coefficient:
   content-negotiation pass      -> 1.0  (mechanical; no agent decision involved)
-  llms-txt-directive pass       -> 0.8  (effective but agents sometimes ignore
-                                         the directive even when present)
+  llms-txt-directive-html pass  -> 0.8  (effective but agents sometimes ignore
+  OR llms-txt-directive-md pass          the directive even when present)
   llms-txt-links-markdown pass  -> 0.5  (requires finding llms.txt first,
                                          then following .md links from it)
   none of the above             -> 0.0  (agents won't find the markdown path)
@@ -393,26 +397,43 @@ Each diagnostic has:
 - **Resolution**: What to do about it
 
 Some diagnostics reference the trigger state of other diagnostics (e.g.,
-`page-size-no-markdown-escape` references whether markdown is undiscoverable).
-The implementation must evaluate diagnostics in dependency order:
-`markdown-undiscoverable` first, then diagnostics that reference it.
+`page-size-no-markdown-escape` references whether markdown is undiscoverable
+or only partially discoverable). The implementation must evaluate diagnostics
+in dependency order: `markdown-undiscoverable` and
+`markdown-partially-discoverable` first, then diagnostics that reference them.
 
 ### Diagnostic Definitions
 
 #### `markdown-undiscoverable`
 
 - **Severity**: warning
-- **Triggers when**: `markdown-url-support` passes, AND all of
-  (`content-negotiation`, `llms-txt-directive`, `llms-txt-links-markdown`)
-  are not pass.
+- **Triggers when**: `markdown-url-support` passes, AND
+  `content-negotiation` does not pass, AND `llms-txt-directive-html` does
+  not pass.
 - **Message**: Your site serves markdown at .md URLs, but agents have no way
-  to discover this. Without content negotiation, an llms.txt directive on your
-  pages, or .md links in your llms.txt, most agents will default to the HTML
-  path. Your markdown support is not being utilized.
-- **Resolution**: Add a blockquote directive near the top of each docs page
-  pointing to your llms.txt, or implement content negotiation for
-  `Accept: text/markdown`. Either change makes your existing markdown support
-  discoverable.
+  to discover this. No agent-facing directive points to your llms.txt, and
+  the server does not support content negotiation. Most agents will default
+  to the HTML path and never benefit from your markdown support.
+- **Resolution**: Add a directive near the top of each docs page pointing to
+  your llms.txt, and implement content negotiation for `Accept: text/markdown`.
+  The directive is the primary discovery mechanism (it reaches all agents);
+  content negotiation provides a fast path for agents that request markdown
+  by default.
+
+#### `markdown-partially-discoverable`
+
+- **Severity**: warning
+- **Triggers when**: `markdown-url-support` passes, AND
+  `content-negotiation` passes, AND `llms-txt-directive-html` does not pass.
+- **Message**: Your site serves markdown and supports content negotiation,
+  but has no agent-facing directive on HTML pages pointing to llms.txt.
+  Agents that send Accept: text/markdown (Claude Code, Cursor, OpenCode) get
+  markdown automatically, but the majority of agents fetch HTML by default
+  and have no signal to try the markdown path.
+- **Resolution**: Add a directive near the top of each docs page pointing to
+  your llms.txt. If your site serves markdown, mention that in the directive
+  too. The directive reaches all agents, not just the ones that request
+  markdown by default.
 
 #### `truncated-index`
 
@@ -448,8 +469,8 @@ The implementation must evaluate diagnostics in dependency order:
 - **Triggers when**: (`llms-txt-exists` fails OR (`llms-txt-exists` passes
   AND `llms-txt-links-resolve` resolveRate < 10%)) AND
   (`rendering-strategy` fails OR `rendering-strategy` not run) AND
-  (`markdown-url-support` fails OR markdown is undiscoverable per
-  `markdown-undiscoverable` trigger).
+  (`markdown-url-support` fails OR `markdown-undiscoverable` triggered OR
+  `markdown-partially-discoverable` triggered).
 
   The expanded llms.txt condition recognizes that an llms.txt where <10% of
   links resolve is functionally equivalent to having no llms.txt: agents
@@ -485,7 +506,8 @@ The implementation must evaluate diagnostics in dependency order:
 
 - **Severity**: warning
 - **Triggers when**: `page-size-html` fails AND (`markdown-url-support` fails
-  OR markdown is undiscoverable).
+  OR `markdown-undiscoverable` triggered OR
+  `markdown-partially-discoverable` triggered).
 - **Message**: {n} pages exceed agent truncation limits on the HTML path, and
   there is no discoverable markdown path for agents to get smaller
   representations. Agents will silently receive truncated content on these
@@ -529,9 +551,9 @@ Agent-Friendly Docs Scorecard
   Interaction Diagnostics:
     [!] Markdown support is undiscoverable
         Your site serves markdown at .md URLs, but agents have no way to
-        discover this. Without content negotiation, an llms.txt directive
-        on your pages, or .md links in your llms.txt, most agents will
-        default to the HTML path.
+        discover this. No agent-facing directive points to your llms.txt,
+        and the server does not support content negotiation. Most agents
+        will default to the HTML path.
 
         Fix: Add a blockquote directive near the top of each docs page
         pointing to your llms.txt, or implement content negotiation for
@@ -553,8 +575,8 @@ Agent-Friendly Docs Scorecard
       PASS  llms-txt-links-resolve All links resolve
       FAIL  llms-txt-links-markdown Links point to HTML, not markdown
             Fix: Update links to use .md URL variants ...
-      FAIL  llms-txt-directive     No directive detected on any tested page
-            Fix: Add a blockquote near the top of each page pointing to ...
+      FAIL  llms-txt-directive-html No directive detected in HTML of any tested page
+            Fix: Add a visually-hidden element near the top of each page ...
 
     ...
 ```
diff --git a/src/checks/content-discoverability/llms-txt-directive-html.ts b/src/checks/content-discoverability/llms-txt-directive-html.ts
new file mode 100644
index 0000000..c3aa701
--- /dev/null
+++ b/src/checks/content-discoverability/llms-txt-directive-html.ts
@@ -0,0 +1,193 @@
+import { registerCheck } from '../registry.js';
+import { discoverAndSamplePages } from '../../helpers/get-page-urls.js';
+import { toHtmlUrl } from '../../helpers/to-md-urls.js';
+import type { CheckContext, CheckResult } from '../../types.js';
+
+interface DirectiveResult {
+  url: string;
+  found: boolean;
+  position?: number;
+  positionPercent?: number;
+  matchText?: string;
+  error?: string;
+}
+
+/**
+ * Links whose href points to an actual llms.txt file (path ends with /llms.txt).
+ * Excludes links to pages *about* llms.txt (e.g. /docs/ai/llmstxt).
+ */
+const LINK_PATTERN =
+  /<a\s[^>]*href\s*=\s*["']([^"']*\/llms\.txt(?:[?#][^"']*)?)["'][^>]*>[\s\S]*?<\/a>/gi;
+
+/**
+ * Plain text mentions of "llms.txt" in body content (after nav/script/style
+ * are stripped). Catches visually-hidden directives and text-only directives.
+ */
+const TEXT_PATTERN = /llms\.txt/gi;
+
+const TOP_THRESHOLD = 0.1;
+const DEEP_THRESHOLD = 0.5;
+
+/**
+ * Extract the HTML body, then strip elements that should not be searched:
+ * <nav>, <script>, <style> (which also covers JSON-LD blocks).
+ */
+function extractSearchableBody(html: string): string {
+  const openMatch = /<body[\s>]/i.exec(html);
+  let body: string;
+  if (openMatch) {
+    const bodyStart = html.indexOf('>', openMatch.index + openMatch[0].length - 1) + 1;
+    const closeMatch = /<\/body\s*>/i.exec(html.slice(bodyStart));
+    const bodyEnd = closeMatch ? bodyStart + closeMatch.index : html.length;
+    body = html.slice(bodyStart, bodyEnd);
+  } else {
+    body = html;
+  }
+
+  return body
+    .replace(/<nav\b[\s\S]*?<\/nav>/gi, '')
+    .replace(/<script\b[\s\S]*?<\/script>/gi, '')
+    .replace(/<style\b[\s\S]*?<\/style>/gi, '');
+}
+
+function searchHtmlContent(content: string): { position: number; matchText: string } | null {
+  LINK_PATTERN.lastIndex = 0;
+  const linkMatch = LINK_PATTERN.exec(content);
+  if (linkMatch) {
+    return { position: linkMatch.index, matchText: linkMatch[0].slice(0, 200) };
+  }
+
+  TEXT_PATTERN.lastIndex = 0;
+  const textMatch = TEXT_PATTERN.exec(content);
+  if (textMatch) {
+    return { position: textMatch.index, matchText: textMatch[0] };
+  }
+
+  return null;
+}
+
+async function check(ctx: CheckContext): Promise<CheckResult> {
+  const id = 'llms-txt-directive-html';
+  const category = 'content-discoverability';
+
+  const { urls: pageUrls, totalPages, sampled, warnings } = await discoverAndSamplePages(ctx);
+
+  const results: DirectiveResult[] = [];
+  const concurrency = ctx.options.maxConcurrency;
+
+  for (let i = 0; i < pageUrls.length; i += concurrency) {
+    const batch = pageUrls.slice(i, i + concurrency);
+    const batchResults = await Promise.all(
+      batch.map(async (url): Promise<DirectiveResult> => {
+        try {
+          const htmlUrl = toHtmlUrl(url);
+          const response = await ctx.http.fetch(htmlUrl);
+          if (!response.ok) {
+            return { url: htmlUrl, found: false, error: `HTTP ${response.status}` };
+          }
+
+          const contentType = response.headers.get('content-type') ?? '';
+          const text = await response.text();
+          const isHtml = contentType.includes('text/html') || text.trimStart().startsWith('<');
+
+          if (!isHtml) {
+            return { url: htmlUrl, found: false };
+          }
+
+          const searchable = extractSearchableBody(text);
+          const hit = searchHtmlContent(searchable);
+          if (hit) {
+            const positionPercent = searchable.length > 0 ? hit.position / searchable.length : 0;
+            return {
+              url: htmlUrl,
+              found: true,
+              position: hit.position,
+              positionPercent,
+              matchText: hit.matchText,
+            };
+          }
+
+          return { url: htmlUrl, found: false };
+        } catch (err) {
+          return {
+            url,
+            found: false,
+            error: err instanceof Error ? err.message : String(err),
+          };
+        }
+      }),
+    );
+    results.push(...batchResults);
+  }
+
+  const tested = results.filter((r) => !r.error);
+  const fetchErrors = results.filter((r) => r.error).length;
+  const found = results.filter((r) => r.found);
+  const notFound = tested.filter((r) => !r.found);
+
+  if (tested.length === 0) {
+    return {
+      id,
+      category,
+      status: 'fail',
+      message: `Could not test any pages${fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : ''}`,
+      details: {
+        totalPages,
+        testedPages: results.length,
+        sampled,
+        fetchErrors,
+        pageResults: results,
+        discoveryWarnings: warnings,
+      },
+    };
+  }
+
+  const nearTop = found.filter((r) => (r.positionPercent ?? 1) <= TOP_THRESHOLD);
+  const buried = found.filter((r) => (r.positionPercent ?? 0) > DEEP_THRESHOLD);
+
+  let status: 'pass' | 'warn' | 'fail';
+  let message: string;
+  const pageLabel = sampled ? 'sampled pages' : 'pages';
+  const suffix = fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '';
+
+  if (found.length === 0) {
+    status = 'fail';
+    message = `No llms.txt directive found in HTML of any of ${tested.length} ${pageLabel}${suffix}`;
+  } else if (buried.length > 0 && nearTop.length === 0) {
+    status = 'warn';
+    message = `llms.txt directive found in HTML of ${found.length} of ${tested.length} ${pageLabel}, but buried deep in the page (past ${Math.round(DEEP_THRESHOLD * 100)}%)${suffix}`;
+  } else if (notFound.length > 0) {
+    status = 'warn';
+    message = `llms.txt directive found in HTML of ${found.length} of ${tested.length} ${pageLabel} (${notFound.length} missing)${suffix}`;
+  } else {
+    status = 'pass';
+    message = `llms.txt directive found in HTML of all ${tested.length} ${pageLabel}${nearTop.length > 0 ? ', near the top of content' : ''}${suffix}`;
+  }
+
+  return {
+    id,
+    category,
+    status,
+    message,
+    details: {
+      totalPages,
+      testedPages: tested.length,
+      sampled,
+      foundCount: found.length,
+      notFoundCount: notFound.length,
+      nearTopCount: nearTop.length,
+      buriedCount: buried.length,
+      fetchErrors,
+      pageResults: results,
+      discoveryWarnings: warnings,
+    },
+  };
+}
+
+registerCheck({
+  id: 'llms-txt-directive-html',
+  category: 'content-discoverability',
+  description: 'Whether HTML pages include a directive pointing to llms.txt',
+  dependsOn: [],
+  run: check,
+});
diff --git a/src/checks/content-discoverability/llms-txt-directive-md.ts b/src/checks/content-discoverability/llms-txt-directive-md.ts
new file mode 100644
index 0000000..d7b59ac
--- /dev/null
+++ b/src/checks/content-discoverability/llms-txt-directive-md.ts
@@ -0,0 +1,202 @@
+import { registerCheck } from '../registry.js';
+import { looksLikeMarkdown } from '../../helpers/detect-markdown.js';
+import { discoverAndSamplePages } from '../../helpers/get-page-urls.js';
+import { toMdUrls, toHtmlUrl } from '../../helpers/to-md-urls.js';
+import type { CheckContext, CheckResult } from '../../types.js';
+
+interface DirectiveResult {
+  url: string;
+  found: boolean;
+  /** The URL that provided the markdown content (may be a .md candidate). */
+  mdUrl?: string;
+  position?: number;
+  positionPercent?: number;
+  matchText?: string;
+  error?: string;
+}
+
+const DIRECTIVE_PATTERN = /llms\.txt/gi;
+
+const TOP_THRESHOLD = 0.1;
+const DEEP_THRESHOLD = 0.5;
+
+function searchContent(
+  content: string,
+  pattern: RegExp,
+): { position: number; matchText: string } | null {
+  pattern.lastIndex = 0;
+  const match = pattern.exec(content);
+  if (!match) return null;
+  return { position: match.index, matchText: match[0].slice(0, 200) };
+}
+
+function evaluateMarkdown(pageUrl: string, content: string, mdUrl: string): DirectiveResult {
+  const hit = searchContent(content, DIRECTIVE_PATTERN);
+  if (hit) {
+    const positionPercent = content.length > 0 ? hit.position / content.length : 0;
+    return {
+      url: pageUrl,
+      found: true,
+      mdUrl,
+      position: hit.position,
+      positionPercent,
+      matchText: hit.matchText,
+    };
+  }
+  return { url: pageUrl, found: false, mdUrl };
+}
+
+/**
+ * Try to fetch markdown content for a page URL via .md URL candidates
+ * and content negotiation.
+ */
+async function fetchMarkdown(
+  ctx: CheckContext,
+  pageUrl: string,
+): Promise<{ text: string; url: string } | null> {
+  const htmlUrl = toHtmlUrl(pageUrl);
+  const mdCandidates = toMdUrls(htmlUrl);
+
+  for (const mdUrl of mdCandidates) {
+    try {
+      const response = await ctx.http.fetch(mdUrl);
+      if (!response.ok) continue;
+      const text = await response.text();
+      if (looksLikeMarkdown(text)) {
+        return { text, url: mdUrl };
+      }
+    } catch {
+      continue;
+    }
+  }
+
+  try {
+    const response = await ctx.http.fetch(htmlUrl, {
+      headers: { Accept: 'text/markdown' },
+    });
+    if (response.ok) {
+      const contentType = response.headers.get('content-type') ?? '';
+      if (contentType.includes('text/markdown')) {
+        const text = await response.text();
+        if (text.trim().length > 0) {
+          return { text, url: htmlUrl };
+        }
+      }
+    }
+  } catch {
+    // Content negotiation failed
+  }
+
+  return null;
+}
+
+async function check(ctx: CheckContext): Promise<CheckResult> {
+  const id = 'llms-txt-directive-md';
+  const category = 'content-discoverability';
+
+  const { urls: pageUrls, totalPages, sampled, warnings } = await discoverAndSamplePages(ctx);
+
+  const results: DirectiveResult[] = [];
+  const concurrency = ctx.options.maxConcurrency;
+
+  for (let i = 0; i < pageUrls.length; i += concurrency) {
+    const batch = pageUrls.slice(i, i + concurrency);
+    const batchResults = await Promise.all(
+      batch.map(async (url): Promise<DirectiveResult> => {
+        try {
+          // Read from cache if dependency checks already fetched markdown
+          const cached = ctx.pageCache.get(url);
+          if (cached?.markdown?.content) {
+            return evaluateMarkdown(url, cached.markdown.content, url);
+          }
+
+          // Not cached; fetch markdown ourselves
+          const md = await fetchMarkdown(ctx, url);
+          if (!md) {
+            return { url, found: false, error: 'No markdown version available' };
+          }
+
+          return evaluateMarkdown(url, md.text, md.url);
+        } catch (err) {
+          return {
+            url,
+            found: false,
+            error: err instanceof Error ? err.message : String(err),
+          };
+        }
+      }),
+    );
+    results.push(...batchResults);
+  }
+
+  const tested = results.filter((r) => !r.error);
+  const fetchErrors = results.filter((r) => r.error).length;
+  const found = results.filter((r) => r.found);
+  const notFound = tested.filter((r) => !r.found);
+
+  if (tested.length === 0) {
+    return {
+      id,
+      category,
+      status: 'fail',
+      message: `Could not fetch markdown for any of ${results.length} pages${fetchErrors > 0 ? `; ${fetchErrors} had no markdown version` : ''}`,
+      details: {
+        totalPages,
+        testedPages: results.length,
+        sampled,
+        fetchErrors,
+        pageResults: results,
+        discoveryWarnings: warnings,
+      },
+    };
+  }
+
+  const nearTop = found.filter((r) => (r.positionPercent ?? 1) <= TOP_THRESHOLD);
+  const buried = found.filter((r) => (r.positionPercent ?? 0) > DEEP_THRESHOLD);
+
+  let status: 'pass' | 'warn' | 'fail';
+  let message: string;
+  const pageLabel = sampled ? 'sampled pages' : 'pages';
+  const suffix = fetchErrors > 0 ? `; ${fetchErrors} had no markdown version` : '';
+
+  if (found.length === 0) {
+    status = 'fail';
+    message = `No llms.txt directive found in markdown of any of ${tested.length} ${pageLabel}${suffix}`;
+  } else if (buried.length > 0 && nearTop.length === 0) {
+    status = 'warn';
+    message = `llms.txt directive found in markdown of ${found.length} of ${tested.length} ${pageLabel}, but buried deep in the page (past ${Math.round(DEEP_THRESHOLD * 100)}%)${suffix}`;
+  } else if (notFound.length > 0) {
+    status = 'warn';
+    message = `llms.txt directive found in markdown of ${found.length} of ${tested.length} ${pageLabel} (${notFound.length} missing)${suffix}`;
+  } else {
+    status = 'pass';
+    message = `llms.txt directive found in markdown of all ${tested.length} ${pageLabel}${nearTop.length > 0 ? ', near the top of content' : ''}${suffix}`;
+  }
+
+  return {
+    id,
+    category,
+    status,
+    message,
+    details: {
+      totalPages,
+      testedPages: tested.length,
+      sampled,
+      foundCount: found.length,
+      notFoundCount: notFound.length,
+      nearTopCount: nearTop.length,
+      buriedCount: buried.length,
+      fetchErrors,
+      pageResults: results,
+      discoveryWarnings: warnings,
+    },
+  };
+}
+
+registerCheck({
+  id: 'llms-txt-directive-md',
+  category: 'content-discoverability',
+  description: 'Whether markdown pages include a directive pointing to llms.txt',
+  dependsOn: [['markdown-url-support', 'content-negotiation']],
+  run: check,
+});
diff --git a/src/checks/content-discoverability/llms-txt-directive.ts b/src/checks/content-discoverability/llms-txt-directive.ts
deleted file mode 100644
index 2abf214..0000000
--- a/src/checks/content-discoverability/llms-txt-directive.ts
+++ /dev/null
@@ -1,233 +0,0 @@
-import { registerCheck } from '../registry.js';
-import { discoverAndSamplePages } from '../../helpers/get-page-urls.js';
-import { toHtmlUrl } from '../../helpers/to-md-urls.js';
-import type { CheckContext, CheckResult } from '../../types.js';
-
-interface DirectiveResult {
-  url: string;
-  found: boolean;
-  /** Where the directive was found: 'html', 'markdown', or undefined if not found. */
-  source?: 'html' | 'markdown';
-  /** Character position of the directive in the content, if found. */
-  position?: number;
-  /** Position as a percentage of total content length. */
-  positionPercent?: number;
-  /** The matched directive text (trimmed for display). */
-  matchText?: string;
-  error?: string;
-}
-
-/**
- * Patterns that indicate an agent-facing directive pointing to llms.txt.
- *
- * HTML pattern matches:
- * - Links whose href contains "llms.txt"
- * - Text mentioning "llms.txt" in prose
- *
- * Markdown pattern matches:
- * - Markdown links to llms.txt (e.g., [index](/llms.txt))
- * - Plain text mentioning "llms.txt"
- */
-const HTML_DIRECTIVE_PATTERN =
-  /(?:<a\s[^>]*href\s*=\s*["'][^"']*llms\.txt[^"']*["'][^>]*>[\s\S]*?<\/a>|llms\.txt)/gi;
-
-const MARKDOWN_DIRECTIVE_PATTERN = /llms\.txt/gi;
-
-/** Percentage threshold: directive in the first 10% is "near the top". */
-const TOP_THRESHOLD = 0.1;
-/** Percentage threshold: directive past 50% is "buried deep". */
-const DEEP_THRESHOLD = 0.5;
-
-/**
- * Extract the HTML body content (between <body> and </body>), or fall
- * back to the full HTML if no body tags are found.
- */
-function extractBody(html: string): { body: string; offset: number } {
-  const openMatch = /<body[\s>]/i.exec(html);
-  if (!openMatch) return { body: html, offset: 0 };
-
-  const bodyStart = html.indexOf('>', openMatch.index + openMatch[0].length - 1) + 1;
-  const closeMatch = /<\/body\s*>/i.exec(html.slice(bodyStart));
-  const bodyEnd = closeMatch ? bodyStart + closeMatch.index : html.length;
-
-  return { body: html.slice(bodyStart, bodyEnd), offset: bodyStart };
-}
-
-function searchContent(
-  content: string,
-  pattern: RegExp,
-): { position: number; matchText: string } | null {
-  const match = pattern.exec(content);
-  pattern.lastIndex = 0;
-  if (!match) return null;
-  return { position: match.index, matchText: match[0].slice(0, 200) };
-}
-
-async function check(ctx: CheckContext): Promise<CheckResult> {
-  const id = 'llms-txt-directive';
-  const category = 'content-discoverability';
-
-  const { urls: pageUrls, totalPages, sampled, warnings } = await discoverAndSamplePages(ctx);
-
-  const results: DirectiveResult[] = [];
-  const concurrency = ctx.options.maxConcurrency;
-
-  for (let i = 0; i < pageUrls.length; i += concurrency) {
-    const batch = pageUrls.slice(i, i + concurrency);
-    const batchResults = await Promise.all(
-      batch.map(async (url): Promise<DirectiveResult> => {
-        try {
-          // Try the HTML version of the page first
-          const htmlUrl = toHtmlUrl(url);
-          const response = await ctx.http.fetch(htmlUrl);
-          if (!response.ok) {
-            return { url: htmlUrl, found: false, error: `HTTP ${response.status}` };
-          }
-
-          const contentType = response.headers.get('content-type') ?? '';
-          const text = await response.text();
-
-          // Determine if we got HTML or markdown
-          const isHtml = contentType.includes('text/html') || text.trimStart().startsWith('<');
-
-          if (isHtml) {
-            const { body } = extractBody(text);
-            const hit = searchContent(body, HTML_DIRECTIVE_PATTERN);
-            if (hit) {
-              const positionPercent = body.length > 0 ? hit.position / body.length : 0;
-              return {
-                url: htmlUrl,
-                found: true,
-                source: 'html',
-                position: hit.position,
-                positionPercent,
-                matchText: hit.matchText,
-              };
-            }
-          } else {
-            // Got markdown content; search it directly
-            const hit = searchContent(text, MARKDOWN_DIRECTIVE_PATTERN);
-            if (hit) {
-              const positionPercent = text.length > 0 ? hit.position / text.length : 0;
-              return {
-                url: htmlUrl,
-                found: true,
-                source: 'markdown',
-                position: hit.position,
-                positionPercent,
-                matchText: hit.matchText,
-              };
-            }
-          }
-
-          // If the original URL was different (a .md URL), also check it
-          if (url !== htmlUrl) {
-            try {
-              const mdResponse = await ctx.http.fetch(url);
-              if (mdResponse.ok) {
-                const mdText = await mdResponse.text();
-                const hit = searchContent(mdText, MARKDOWN_DIRECTIVE_PATTERN);
-                if (hit) {
-                  const positionPercent = mdText.length > 0 ? hit.position / mdText.length : 0;
-                  return {
-                    url,
-                    found: true,
-                    source: 'markdown',
-                    position: hit.position,
-                    positionPercent,
-                    matchText: hit.matchText,
-                  };
-                }
-              }
-            } catch {
-              // Markdown fetch failed; that's fine, we already checked HTML
-            }
-          }
-
-          return { url: htmlUrl, found: false };
-        } catch (err) {
-          return {
-            url,
-            found: false,
-            error: err instanceof Error ? err.message : String(err),
-          };
-        }
-      }),
-    );
-    results.push(...batchResults);
-  }
-
-  const tested = results.filter((r) => !r.error);
-  const fetchErrors = results.filter((r) => r.error).length;
-  const found = results.filter((r) => r.found);
-  const notFound = tested.filter((r) => !r.found);
-
-  if (tested.length === 0) {
-    return {
-      id,
-      category,
-      status: 'fail',
-      message: `Could not test any pages${fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : ''}`,
-      details: {
-        totalPages,
-        testedPages: results.length,
-        sampled,
-        fetchErrors,
-        pageResults: results,
-        discoveryWarnings: warnings,
-      },
-    };
-  }
-
-  // Classify pages with directives by position
-  const nearTop = found.filter((r) => (r.positionPercent ?? 1) <= TOP_THRESHOLD);
-  const buried = found.filter((r) => (r.positionPercent ?? 0) > DEEP_THRESHOLD);
-
-  let status: 'pass' | 'warn' | 'fail';
-  let message: string;
-  const pageLabel = sampled ? 'sampled pages' : 'pages';
-  const suffix = fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '';
-
-  if (found.length === 0) {
-    status = 'fail';
-    message = `No llms.txt directive found in any of ${tested.length} ${pageLabel}${suffix}`;
-  } else if (buried.length > 0 && nearTop.length === 0) {
-    // All found directives are buried deep
-    status = 'warn';
-    message = `llms.txt directive found in ${found.length} of ${tested.length} ${pageLabel}, but buried deep in the page (past ${Math.round(DEEP_THRESHOLD * 100)}%)${suffix}`;
-  } else if (notFound.length > 0) {
-    // Some pages have directives, some don't
-    status = 'warn';
-    message = `llms.txt directive found in ${found.length} of ${tested.length} ${pageLabel} (${notFound.length} missing)${suffix}`;
-  } else {
-    status = 'pass';
-    message = `llms.txt directive found in all ${tested.length} ${pageLabel}${nearTop.length > 0 ? ', near the top of content' : ''}${suffix}`;
-  }
-
-  return {
-    id,
-    category,
-    status,
-    message,
-    details: {
-      totalPages,
-      testedPages: tested.length,
-      sampled,
-      foundCount: found.length,
-      notFoundCount: notFound.length,
-      nearTopCount: nearTop.length,
-      buriedCount: buried.length,
-      fetchErrors,
-      pageResults: results,
-      discoveryWarnings: warnings,
-    },
-  };
-}
-
-registerCheck({
-  id: 'llms-txt-directive',
-  category: 'content-discoverability',
-  description: 'Whether pages include a directive pointing to llms.txt',
-  dependsOn: [],
-  run: check,
-});
diff --git a/src/checks/index.ts b/src/checks/index.ts
index 342b0c3..ef9ad14 100644
--- a/src/checks/index.ts
+++ b/src/checks/index.ts
@@ -6,7 +6,8 @@ import './content-discoverability/llms-txt-valid.js';
 import './content-discoverability/llms-txt-size.js';
 import './content-discoverability/llms-txt-links-resolve.js';
 import './content-discoverability/llms-txt-links-markdown.js';
-import './content-discoverability/llms-txt-directive.js';
+import './content-discoverability/llms-txt-directive-html.js';
+import './content-discoverability/llms-txt-directive-md.js';
 
 // Category 2: Markdown Availability
 import './markdown-availability/markdown-url-support.js';
diff --git a/src/cli/formatters/text.ts b/src/cli/formatters/text.ts
index d78d31c..f5f3742 100644
--- a/src/cli/formatters/text.ts
+++ b/src/cli/formatters/text.ts
@@ -194,7 +194,21 @@ const DETAIL_FORMATTERS: Record<string, DetailFormatter> = {
       });
   },
 
-  'llms-txt-directive': (details) => {
+  'llms-txt-directive-html': (details) => {
+    const pages = details.pageResults as
+      | Array<{ url: string; found: boolean; positionPercent?: number; error?: string }>
+      | undefined;
+    if (!pages) return [];
+    return pages
+      .filter((p) => !p.found || (p.positionPercent != null && p.positionPercent > 10))
+      .map((p) => {
+        if (p.error) return formatDetailLine('fail', p.url, p.error);
+        if (!p.found) return formatDetailLine('fail', p.url, 'no directive found');
+        return formatDetailLine('warn', p.url, `directive at ${p.positionPercent}% of page`);
+      });
+  },
+
+  'llms-txt-directive-md': (details) => {
     const pages = details.pageResults as
       | Array<{ url: string; found: boolean; positionPercent?: number; error?: string }>
       | undefined;
diff --git a/src/scoring/coefficients.ts b/src/scoring/coefficients.ts
index 6ed4a9c..2bde2ed 100644
--- a/src/scoring/coefficients.ts
+++ b/src/scoring/coefficients.ts
@@ -40,8 +40,9 @@ function getDiscoveryCoefficient(results: Map<string, CheckResult>): number {
   const cn = results.get('content-negotiation');
   if (cn?.status === 'pass') return 1.0;
 
-  const directive = results.get('llms-txt-directive');
-  if (directive?.status === 'pass') return 0.8;
+  const directiveHtml = results.get('llms-txt-directive-html');
+  const directiveMd = results.get('llms-txt-directive-md');
+  if (directiveHtml?.status === 'pass' || directiveMd?.status === 'pass') return 0.8;
 
   const linksMd = results.get('llms-txt-links-markdown');
   if (linksMd?.status === 'pass') return 0.5;
diff --git a/src/scoring/diagnostics.ts b/src/scoring/diagnostics.ts
index 6834140..cf89375 100644
--- a/src/scoring/diagnostics.ts
+++ b/src/scoring/diagnostics.ts
@@ -12,7 +12,7 @@ interface DiagnosticDefinition {
 
 // Evaluated in this order (dependency order matters)
 const DIAGNOSTIC_DEFINITIONS: DiagnosticDefinition[] = [
-  // --- markdown-undiscoverable must be first (others reference it) ---
+  // --- markdown discovery diagnostics must be first (others reference them) ---
   {
     id: 'markdown-undiscoverable',
     severity: 'warning',
@@ -21,21 +21,47 @@ const DIAGNOSTIC_DEFINITIONS: DiagnosticDefinition[] = [
       if (mdSupport?.status !== 'pass') return false;
 
       const cn = results.get('content-negotiation');
-      const directive = results.get('llms-txt-directive');
-      const linksMd = results.get('llms-txt-links-markdown');
+      const directiveHtml = results.get('llms-txt-directive-html');
 
-      return cn?.status !== 'pass' && directive?.status !== 'pass' && linksMd?.status !== 'pass';
+      return cn?.status !== 'pass' && directiveHtml?.status !== 'pass';
     },
     message: () =>
       'Your site serves markdown at .md URLs, but agents have no way to ' +
-      'discover this. Without content negotiation, an llms.txt directive ' +
-      'on your pages, or .md links in your llms.txt, most agents will ' +
-      'default to the HTML path. Your markdown support is not being utilized.',
+      'discover this. No agent-facing directive points to your llms.txt, ' +
+      'and the server does not support content negotiation. Most agents ' +
+      'will default to the HTML path and never benefit from your markdown ' +
+      'support.',
     resolution:
-      'Add a blockquote directive near the top of each docs page ' +
-      'pointing to your llms.txt, or implement content negotiation for ' +
-      'Accept: text/markdown. Either change makes your existing markdown ' +
-      'support discoverable.',
+      'Add a directive near the top of each docs page pointing to your ' +
+      'llms.txt, and implement content negotiation for Accept: text/markdown. ' +
+      'The directive is the primary discovery mechanism (it reaches all ' +
+      'agents); content negotiation provides a fast path for agents that ' +
+      'request markdown by default.',
+  },
+
+  {
+    id: 'markdown-partially-discoverable',
+    severity: 'warning',
+    triggers: (results) => {
+      const mdSupport = results.get('markdown-url-support');
+      if (mdSupport?.status !== 'pass') return false;
+
+      const cn = results.get('content-negotiation');
+      const directiveHtml = results.get('llms-txt-directive-html');
+
+      return cn?.status === 'pass' && directiveHtml?.status !== 'pass';
+    },
+    message: () =>
+      'Your site serves markdown and supports content negotiation, but ' +
+      'has no agent-facing directive on HTML pages pointing to llms.txt. ' +
+      'Agents that send Accept: text/markdown (Claude Code, Cursor, ' +
+      'OpenCode) get markdown automatically, but the majority of agents ' +
+      'fetch HTML by default and have no signal to try the markdown path.',
+    resolution:
+      'Add a directive near the top of each docs page pointing to your ' +
+      'llms.txt. If your site serves markdown, mention that in the ' +
+      'directive too. The directive reaches all agents, not just the ones ' +
+      'that request markdown by default.',
   },
 
   {
@@ -135,7 +161,11 @@ const DIAGNOSTIC_DEFINITIONS: DiagnosticDefinition[] = [
 
       const mdSupport = results.get('markdown-url-support');
       if (mdSupport?.status === 'fail') return true;
-      if (triggered.has('markdown-undiscoverable')) return true;
+      if (
+        triggered.has('markdown-undiscoverable') ||
+        triggered.has('markdown-partially-discoverable')
+      )
+        return true;
 
       return false;
     },
@@ -191,7 +221,11 @@ const DIAGNOSTIC_DEFINITIONS: DiagnosticDefinition[] = [
 
       const mdSupport = results.get('markdown-url-support');
       if (mdSupport?.status === 'fail') return true;
-      if (triggered.has('markdown-undiscoverable')) return true;
+      if (
+        triggered.has('markdown-undiscoverable') ||
+        triggered.has('markdown-partially-discoverable')
+      )
+        return true;
 
       return false;
     },
diff --git a/src/scoring/proportions.ts b/src/scoring/proportions.ts
index 5dd4d17..609d4a5 100644
--- a/src/scoring/proportions.ts
+++ b/src/scoring/proportions.ts
@@ -75,7 +75,8 @@ const PROPORTION_EXTRACTORS: Record<string, ProportionExtractor> = {
   'markdown-url-support': markdownUrlSupportExtractor,
   'content-negotiation': contentNegotiationExtractor,
   'http-status-codes': httpStatusCodesExtractor,
-  'llms-txt-directive': llmsTxtDirectiveExtractor,
+  'llms-txt-directive-html': llmsTxtDirectiveExtractor,
+  'llms-txt-directive-md': llmsTxtDirectiveExtractor,
 
   // --- Custom extractors for checks with non-standard detail shapes ---
   'rendering-strategy': renderingStrategyExtractor,
diff --git a/src/scoring/resolutions.ts b/src/scoring/resolutions.ts
index b42eeca..88492ef 100644
--- a/src/scoring/resolutions.ts
+++ b/src/scoring/resolutions.ts
@@ -79,16 +79,28 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       'variants so agents receive markdown instead of converted HTML.',
   },
 
-  'llms-txt-directive': {
+  'llms-txt-directive-html': {
     warn: () =>
-      'An llms.txt directive was found on some pages but is missing from ' +
-      'others, or is buried deep in the page. Ensure the directive appears ' +
-      'near the top of every documentation page.',
+      'An llms.txt directive was found in the HTML of some pages but is ' +
+      'missing from others, or is buried deep in the page. Ensure the ' +
+      'directive appears near the top of every documentation page.',
     fail: () =>
-      'No agent-facing directive pointing to llms.txt was detected on any ' +
-      'tested page. Add a blockquote near the top of each page (e.g., ' +
-      '"> For the complete documentation index, see [llms.txt](/llms.txt)"). ' +
-      'This can be visually hidden with CSS while remaining accessible to agents.',
+      'No agent-facing directive pointing to llms.txt was detected in the ' +
+      'HTML of any tested page. Add a visually-hidden element near the top ' +
+      'of each page (e.g., a div with CSS clip-rect) containing a link to ' +
+      'your llms.txt. If your site serves markdown versions of pages, ' +
+      'mention that in the directive too so agents know to request it.',
+  },
+
+  'llms-txt-directive-md': {
+    warn: () =>
+      'An llms.txt directive was found in the markdown of some pages but is ' +
+      'missing from others, or is buried deep in the page. Ensure the ' +
+      'directive appears near the top of every markdown page.',
+    fail: () =>
+      'No llms.txt directive was detected in the markdown of any tested ' +
+      'page. Add a blockquote near the top of each markdown page (e.g., ' +
+      '"> For the complete documentation index, see [llms.txt](/llms.txt)").',
   },
 
   'markdown-url-support': {
diff --git a/src/scoring/tag-scores.ts b/src/scoring/tag-scores.ts
index f2601c6..7591928 100644
--- a/src/scoring/tag-scores.ts
+++ b/src/scoring/tag-scores.ts
@@ -98,7 +98,14 @@ const STATUS_MAPPERS: Record<string, (item: Record<string, unknown>) => string>
     }
   },
 
-  'llms-txt-directive': (item) => {
+  'llms-txt-directive-html': (item) => {
+    if (item.error) return 'skip';
+    if (!item.found) return 'fail';
+    if (typeof item.positionPercent === 'number' && item.positionPercent > 50) return 'warn';
+    return 'pass';
+  },
+
+  'llms-txt-directive-md': (item) => {
     if (item.error) return 'skip';
     if (!item.found) return 'fail';
     if (typeof item.positionPercent === 'number' && item.positionPercent > 50) return 'warn';
diff --git a/src/scoring/weights.ts b/src/scoring/weights.ts
index 4e61bed..3c721f4 100644
--- a/src/scoring/weights.ts
+++ b/src/scoring/weights.ts
@@ -37,7 +37,8 @@ export const CHECK_WEIGHTS: Record<string, CheckWeight> = {
   'page-size-markdown': w('high', 0.5),
   'page-size-html': w('high', 0.5),
   'http-status-codes': w('high'),
-  'llms-txt-directive': w('high', 0.6),
+  'llms-txt-directive-html': w('high', 0.6),
+  'llms-txt-directive-md': w('medium', 0.6),
 
   // Medium
   'llms-txt-valid': w('medium', 0.75),
diff --git a/test/integration/cli.test.ts b/test/integration/cli.test.ts
index 00fa104..f573917 100644
--- a/test/integration/cli.test.ts
+++ b/test/integration/cli.test.ts
@@ -71,6 +71,6 @@ describe('CLI', () => {
     // Should have results from multiple categories
     const categories = new Set(report.results.map((r) => r.category));
     expect(categories.size).toBeGreaterThan(1);
-    expect(report.summary.total).toBe(22);
+    expect(report.summary.total).toBe(23);
   });
 });
diff --git a/test/unit/checks/llms-txt-directive.test.ts b/test/unit/checks/llms-txt-directive-html.test.ts
similarity index 81%
rename from test/unit/checks/llms-txt-directive.test.ts
rename to test/unit/checks/llms-txt-directive-html.test.ts
index 8e531cf..871f72b 100644
--- a/test/unit/checks/llms-txt-directive.test.ts
+++ b/test/unit/checks/llms-txt-directive-html.test.ts
@@ -14,8 +14,8 @@ beforeAll(() => {
   return () => server.close();
 });
 
-describe('llms-txt-directive', () => {
-  const check = getCheck('llms-txt-directive')!;
+describe('llms-txt-directive-html', () => {
+  const check = getCheck('llms-txt-directive-html')!;
 
   function makeCtx(llmsTxtContent?: string) {
     const ctx = createContext('http://test.local', { requestDelay: 0 });
@@ -72,13 +72,13 @@ describe('llms-txt-directive', () => {
     expect(result.message).toContain('near the top');
   });
 
-  it('passes when llms.txt is mentioned as text near the top', async () => {
+  it('passes with visually hidden directive using sr-only', async () => {
     server.use(
       http.get(
         'http://test.local/docs/page1',
         () =>
           new HttpResponse(
-            '<html><body><p>See our llms.txt for a full documentation index.</p><h1>Docs</h1><p>Content...</p></body></html>',
+            '<html><body><span class="sr-only"><a href="/llms.txt">Full documentation index</a></span><h1>Docs</h1><p>Content...</p></body></html>',
             { status: 200, headers: { 'Content-Type': 'text/html' } },
           ),
       ),
@@ -89,13 +89,13 @@ describe('llms-txt-directive', () => {
     expect(result.details?.foundCount).toBe(1);
   });
 
-  it('passes with visually hidden directive using sr-only', async () => {
+  it('passes with full URL link to llms.txt', async () => {
     server.use(
       http.get(
         'http://test.local/docs/page1',
         () =>
           new HttpResponse(
-            '<html><body><span class="sr-only"><a href="/llms.txt">Full documentation index</a></span><h1>Docs</h1><p>Content...</p></body></html>',
+            '<html><body><a href="https://example.com/llms.txt">Documentation Index</a><p>Content</p></body></html>',
             { status: 200, headers: { 'Content-Type': 'text/html' } },
           ),
       ),
@@ -106,6 +106,47 @@ describe('llms-txt-directive', () => {
     expect(result.details?.foundCount).toBe(1);
   });
 
+  it('excludes nav elements from search (fixes sidebar false positive)', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(
+            '<html><body>' +
+              '<nav><ul><li id="/ai/llmstxt" data-title="llms.txt"><a href="/docs/ai/llmstxt"><span>llms.txt</span></a></li></ul></nav>' +
+              '<h1>Docs</h1><p>Documentation content here.</p>' +
+              '</body></html>',
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
+          ),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.foundCount).toBe(0);
+  });
+
+  it('excludes script and style elements from search', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(
+            '<html><body>' +
+              '<script type="application/ld+json">{"name": "llms.txt guide"}</script>' +
+              '<style>/* llms.txt styling */</style>' +
+              '<h1>Docs</h1><p>Documentation content here.</p>' +
+              '</body></html>',
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
+          ),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.foundCount).toBe(0);
+  });
+
   it('warns when directive is buried deep in the page', async () => {
     const padding = '<p>Lorem ipsum dolor sit amet.</p>'.repeat(200);
     server.use(
@@ -190,24 +231,7 @@ describe('llms-txt-directive', () => {
     expect(result.details?.fetchErrors).toBe(1);
   });
 
-  it('detects full URL links to llms.txt', async () => {
-    server.use(
-      http.get(
-        'http://test.local/docs/page1',
-        () =>
-          new HttpResponse(
-            '<html><body><a href="https://example.com/llms.txt">Documentation Index</a><p>Content</p></body></html>',
-            { status: 200, headers: { 'Content-Type': 'text/html' } },
-          ),
-      ),
-    );
-
-    const result = await check.run(makeCtx(llms('/docs/page1')));
-    expect(result.status).toBe('pass');
-    expect(result.details?.foundCount).toBe(1);
-  });
-
-  it('ignores pages without body tags', async () => {
+  it('handles pages without body tags by searching full HTML', async () => {
     server.use(
       http.get(
         'http://test.local/docs/page1',
@@ -219,40 +243,37 @@ describe('llms-txt-directive', () => {
       ),
     );
 
-    // Should still work by falling back to full HTML
     const result = await check.run(makeCtx(llms('/docs/page1')));
     expect(result.status).toBe('pass');
     expect(result.details?.foundCount).toBe(1);
   });
 
-  it('strips .md from URLs and fetches HTML version', async () => {
-    // llms.txt links to .md URLs, but directive is in the HTML version
-    const content = '<p>Documentation content.</p>'.repeat(10);
+  it('ignores non-HTML responses', async () => {
     server.use(
       http.get(
         'http://test.local/docs/page1/',
         () =>
           new HttpResponse(
-            `<html><body><a href="/llms.txt">Documentation index</a><h1>Docs</h1>${content}</body></html>`,
-            { status: 200, headers: { 'Content-Type': 'text/html' } },
+            'For AI agents: see [documentation index](/llms.txt) for navigation.\n\n# Welcome',
+            { status: 200, headers: { 'Content-Type': 'text/markdown' } },
           ),
       ),
     );
 
     const result = await check.run(makeCtx(llms('/docs/page1/index.md')));
-    expect(result.status).toBe('pass');
-    expect(result.details?.foundCount).toBe(1);
+    expect(result.status).toBe('fail');
+    expect(result.details?.foundCount).toBe(0);
   });
 
-  it('finds directive in markdown content when HTML is not available', async () => {
-    // HTML URL returns the markdown directly (some sites do this)
+  it('strips .md from URLs and fetches HTML version', async () => {
+    const content = '<p>Documentation content.</p>'.repeat(10);
     server.use(
       http.get(
         'http://test.local/docs/page1/',
         () =>
           new HttpResponse(
-            'For AI agents: see [documentation index](/llms.txt) for navigation.\n\n# Welcome\n\nContent here.',
-            { status: 200, headers: { 'Content-Type': 'text/markdown' } },
+            `<html><body><a href="/llms.txt">Documentation index</a><h1>Docs</h1>${content}</body></html>`,
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
           ),
       ),
     );
@@ -260,41 +281,22 @@ describe('llms-txt-directive', () => {
     const result = await check.run(makeCtx(llms('/docs/page1/index.md')));
     expect(result.status).toBe('pass');
     expect(result.details?.foundCount).toBe(1);
-    const pages = result.details?.pageResults as Array<{ source?: string }>;
-    expect(pages[0].source).toBe('markdown');
   });
 
-  it('falls back to .md URL when HTML version has no directive', async () => {
-    // Curated pages bypass discovery, so .md URLs reach the check directly.
-    // The check should try the HTML version first, then fall back to the .md URL.
+  it('detects text mention of llms.txt in content area (outside nav)', async () => {
     server.use(
       http.get(
-        'http://test.local/docs/page1/',
-        () =>
-          new HttpResponse('<html><body><h1>Docs</h1><p>No directive here</p></body></html>', {
-            status: 200,
-            headers: { 'Content-Type': 'text/html' },
-          }),
-      ),
-      http.get(
-        'http://test.local/docs/page1/index.md',
+        'http://test.local/docs/page1',
         () =>
           new HttpResponse(
-            'For AI agents: see /llms.txt for a documentation index.\n\n# Docs\n\nContent.',
-            { status: 200, headers: { 'Content-Type': 'text/markdown' } },
+            '<html><body><p>See our llms.txt for a full documentation index.</p><h1>Docs</h1><p>Content...</p></body></html>',
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
           ),
       ),
     );
 
-    const ctx = createContext('http://test.local', {
-      requestDelay: 0,
-      samplingStrategy: 'curated',
-      curatedPages: ['http://test.local/docs/page1/index.md'],
-    });
-    const result = await check.run(ctx);
+    const result = await check.run(makeCtx(llms('/docs/page1')));
     expect(result.status).toBe('pass');
     expect(result.details?.foundCount).toBe(1);
-    const pages = result.details?.pageResults as Array<{ source?: string }>;
-    expect(pages[0].source).toBe('markdown');
   });
 });
diff --git a/test/unit/checks/llms-txt-directive-md.test.ts b/test/unit/checks/llms-txt-directive-md.test.ts
new file mode 100644
index 0000000..d225be4
--- /dev/null
+++ b/test/unit/checks/llms-txt-directive-md.test.ts
@@ -0,0 +1,287 @@
+import { describe, it, expect, beforeAll } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { createContext } from '../../../src/runner.js';
+import { getCheck } from '../../../src/checks/registry.js';
+import '../../../src/checks/index.js';
+import type { DiscoveredFile } from '../../../src/types.js';
+import { mockSitemapNotFound } from '../../helpers/mock-sitemap-not-found.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+describe('llms-txt-directive-md', () => {
+  const check = getCheck('llms-txt-directive-md')!;
+
+  function makeCtx(llmsTxtContent?: string) {
+    const ctx = createContext('http://test.local', { requestDelay: 0 });
+
+    if (llmsTxtContent) {
+      const discovered: DiscoveredFile[] = [
+        {
+          url: 'http://test.local/llms.txt',
+          content: llmsTxtContent,
+          status: 200,
+          redirected: false,
+        },
+      ];
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'content-discoverability',
+        status: 'pass',
+        message: 'Found',
+        details: { discoveredFiles: discovered },
+      });
+      mockSitemapNotFound(server, 'http://test.local');
+    } else {
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'content-discoverability',
+        status: 'fail',
+        message: 'No llms.txt found',
+        details: { discoveredFiles: [] },
+      });
+    }
+
+    // Set markdown-url-support as passing so the dependency is satisfied
+    ctx.previousResults.set('markdown-url-support', {
+      id: 'markdown-url-support',
+      category: 'markdown-availability',
+      status: 'pass',
+      message: 'Markdown supported',
+    });
+
+    return ctx;
+  }
+
+  const llms = (...pages: string[]) =>
+    `# Docs\n## Links\n${pages.map((p, i) => `- [Page ${i + 1}](http://test.local${p}): Page\n`).join('')}`;
+
+  it('passes when directive found in cached markdown content', async () => {
+    const padding = '\n\nLorem ipsum dolor sit amet. '.repeat(20);
+    const ctx = makeCtx(llms('/docs/page1'));
+    // Simulate content cached by markdown-url-support check
+    ctx.pageCache.set('http://test.local/docs/page1', {
+      url: 'http://test.local/docs/page1',
+      markdown: {
+        content: `> See [llms.txt](/llms.txt) for the documentation index.\n\n# Welcome${padding}`,
+        source: 'md-url',
+      },
+    });
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.foundCount).toBe(1);
+    expect(result.details?.nearTopCount).toBe(1);
+  });
+
+  it('passes when directive found via .md URL fetch', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1.md',
+        () =>
+          new HttpResponse(
+            '> For AI agents: see [documentation index](/llms.txt) for navigation.\n\n# Welcome\n\nContent here.',
+            { status: 200, headers: { 'Content-Type': 'text/markdown' } },
+          ),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('pass');
+    expect(result.details?.foundCount).toBe(1);
+    const pages = result.details?.pageResults as Array<{ mdUrl?: string }>;
+    expect(pages[0].mdUrl).toBe('http://test.local/docs/page1.md');
+  });
+
+  it('passes when directive found via index.md URL', async () => {
+    server.use(
+      http.get('http://test.local/docs/page1.md', () => new HttpResponse('', { status: 404 })),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () =>
+          new HttpResponse(
+            '> For AI agents: see /llms.txt for a documentation index.\n\n# Docs\n\nContent.',
+            { status: 200, headers: { 'Content-Type': 'text/markdown' } },
+          ),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('pass');
+    expect(result.details?.foundCount).toBe(1);
+    const pages = result.details?.pageResults as Array<{ mdUrl?: string }>;
+    expect(pages[0].mdUrl).toBe('http://test.local/docs/page1/index.md');
+  });
+
+  it('passes when directive found via content negotiation', async () => {
+    const padding = '\n\nLorem ipsum dolor sit amet. '.repeat(20);
+    server.use(
+      http.get('http://test.local/docs/page1.md', () => new HttpResponse('', { status: 404 })),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+      http.get('http://test.local/docs/page1', ({ request }) => {
+        const accept = request.headers.get('accept') ?? '';
+        if (accept.includes('text/markdown')) {
+          return new HttpResponse(
+            `> See [llms.txt](/llms.txt) for the documentation index.\n\n# Welcome${padding}`,
+            { status: 200, headers: { 'Content-Type': 'text/markdown' } },
+          );
+        }
+        return new HttpResponse('<html><body><h1>Docs</h1></body></html>', {
+          status: 200,
+          headers: { 'Content-Type': 'text/html' },
+        });
+      }),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('pass');
+    expect(result.details?.foundCount).toBe(1);
+  });
+
+  it('warns when directive is buried deep in markdown', async () => {
+    const padding = 'Lorem ipsum dolor sit amet.\n\n'.repeat(200);
+    server.use(
+      http.get(
+        'http://test.local/docs/page1.md',
+        () =>
+          new HttpResponse(`# Docs\n\n${padding}> See llms.txt for the index.\n`, {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('warn');
+    expect(result.details?.buriedCount).toBe(1);
+    expect(result.message).toContain('buried deep');
+  });
+
+  it('warns when some pages have directive and some do not', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1.md',
+        () =>
+          new HttpResponse('> See [llms.txt](/llms.txt)\n\n# Page 1\n\nContent.', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+      http.get(
+        'http://test.local/docs/page2.md',
+        () =>
+          new HttpResponse('# Page 2\n\nNo directive here.', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    // Also mock index.md and content-negotiation fallbacks as 404
+    server.use(
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+      http.get(
+        'http://test.local/docs/page2/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1', '/docs/page2')));
+    expect(result.status).toBe('warn');
+    expect(result.details?.foundCount).toBe(1);
+    expect(result.details?.notFoundCount).toBe(1);
+    expect(result.message).toContain('missing');
+  });
+
+  it('fails when no directive found in any markdown page', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1.md',
+        () =>
+          new HttpResponse('# Welcome\n\nNo directive.', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.foundCount).toBe(0);
+  });
+
+  it('reports error when no markdown version is available', async () => {
+    server.use(
+      http.get('http://test.local/docs/page1.md', () => new HttpResponse('', { status: 404 })),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse('<html><body><h1>Docs</h1></body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.fetchErrors).toBe(1);
+  });
+
+  it('handles curated .md pages', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1/',
+        () =>
+          new HttpResponse('<html><body><h1>Docs</h1></body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () =>
+          new HttpResponse(
+            '> For AI agents: see /llms.txt for a documentation index.\n\n# Docs\n\nContent.',
+            { status: 200, headers: { 'Content-Type': 'text/markdown' } },
+          ),
+      ),
+      http.get('http://test.local/docs/page1.md', () => new HttpResponse('', { status: 404 })),
+    );
+
+    const ctx = createContext('http://test.local', {
+      requestDelay: 0,
+      samplingStrategy: 'curated',
+      curatedPages: ['http://test.local/docs/page1/index.md'],
+    });
+    ctx.previousResults.set('markdown-url-support', {
+      id: 'markdown-url-support',
+      category: 'markdown-availability',
+      status: 'pass',
+      message: 'Markdown supported',
+    });
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.foundCount).toBe(1);
+  });
+});
diff --git a/test/unit/cli/formatters.test.ts b/test/unit/cli/formatters.test.ts
index 6617b80..8bd304d 100644
--- a/test/unit/cli/formatters.test.ts
+++ b/test/unit/cli/formatters.test.ts
@@ -620,11 +620,11 @@ describe('formatText', () => {
       expect(output).not.toContain('https://example.com/docs/public');
     });
 
-    it('shows missing directives for llms-txt-directive', () => {
+    it('shows missing directives for llms-txt-directive-html', () => {
       const report = makeReport({
         results: [
           {
-            id: 'llms-txt-directive',
+            id: 'llms-txt-directive-html',
             category: 'content-discoverability',
             status: 'fail',
             message: 'No directives found',
diff --git a/test/unit/scoring/coefficients.test.ts b/test/unit/scoring/coefficients.test.ts
index 2364fc0..61dd40b 100644
--- a/test/unit/scoring/coefficients.test.ts
+++ b/test/unit/scoring/coefficients.test.ts
@@ -29,15 +29,27 @@ describe('coefficients', () => {
       }
     });
 
-    it('returns 0.8 when llms-txt-directive passes (no content-negotiation)', () => {
-      const results = resultsMap(r('content-negotiation', 'fail'), r('llms-txt-directive', 'pass'));
+    it('returns 0.8 when llms-txt-directive-html passes (no content-negotiation)', () => {
+      const results = resultsMap(
+        r('content-negotiation', 'fail'),
+        r('llms-txt-directive-html', 'pass'),
+      );
+      expect(getCoefficient('page-size-markdown', results)).toBe(0.8);
+    });
+
+    it('returns 0.8 when llms-txt-directive-md passes (no content-negotiation)', () => {
+      const results = resultsMap(
+        r('content-negotiation', 'fail'),
+        r('llms-txt-directive-md', 'pass'),
+      );
       expect(getCoefficient('page-size-markdown', results)).toBe(0.8);
     });
 
     it('returns 0.5 when only llms-txt-links-markdown passes', () => {
       const results = resultsMap(
         r('content-negotiation', 'fail'),
-        r('llms-txt-directive', 'fail'),
+        r('llms-txt-directive-html', 'fail'),
+        r('llms-txt-directive-md', 'fail'),
         r('llms-txt-links-markdown', 'pass'),
       );
       expect(getCoefficient('page-size-markdown', results)).toBe(0.5);
@@ -46,7 +58,8 @@ describe('coefficients', () => {
     it('returns 0.0 when nothing passes', () => {
       const results = resultsMap(
         r('content-negotiation', 'fail'),
-        r('llms-txt-directive', 'fail'),
+        r('llms-txt-directive-html', 'fail'),
+        r('llms-txt-directive-md', 'fail'),
         r('llms-txt-links-markdown', 'fail'),
       );
       expect(getCoefficient('page-size-markdown', results)).toBe(0.0);
@@ -60,7 +73,8 @@ describe('coefficients', () => {
     it('uses highest coefficient when multiple pass', () => {
       const results = resultsMap(
         r('content-negotiation', 'pass'),
-        r('llms-txt-directive', 'pass'),
+        r('llms-txt-directive-html', 'pass'),
+        r('llms-txt-directive-md', 'pass'),
         r('llms-txt-links-markdown', 'pass'),
       );
       expect(getCoefficient('page-size-markdown', results)).toBe(1.0);
diff --git a/test/unit/scoring/diagnostics.test.ts b/test/unit/scoring/diagnostics.test.ts
index b01063f..5253da1 100644
--- a/test/unit/scoring/diagnostics.test.ts
+++ b/test/unit/scoring/diagnostics.test.ts
@@ -16,40 +16,83 @@ function resultsMap(...results: CheckResult[]): Map<string, CheckResult> {
 
 describe('diagnostics', () => {
   describe('markdown-undiscoverable', () => {
-    it('triggers when markdown supported but not discoverable', () => {
+    it('triggers when markdown supported but no directive and no content negotiation', () => {
       const results = resultsMap(
         r('markdown-url-support', 'pass'),
         r('content-negotiation', 'fail'),
-        r('llms-txt-directive', 'fail'),
-        r('llms-txt-links-markdown', 'fail'),
+        r('llms-txt-directive-html', 'fail'),
       );
       const diags = evaluateDiagnostics(results);
       expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeDefined();
     });
 
-    it('does not trigger when content-negotiation passes', () => {
+    it('does not trigger when directive-html passes', () => {
+      const results = resultsMap(
+        r('markdown-url-support', 'pass'),
+        r('content-negotiation', 'fail'),
+        r('llms-txt-directive-html', 'pass'),
+      );
+      const diags = evaluateDiagnostics(results);
+      expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeUndefined();
+    });
+
+    it('does not trigger when content-negotiation passes (partially-discoverable fires instead)', () => {
       const results = resultsMap(
         r('markdown-url-support', 'pass'),
         r('content-negotiation', 'pass'),
-        r('llms-txt-directive', 'fail'),
-        r('llms-txt-links-markdown', 'fail'),
+        r('llms-txt-directive-html', 'fail'),
       );
       const diags = evaluateDiagnostics(results);
       expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeUndefined();
+      expect(diags.find((d) => d.id === 'markdown-partially-discoverable')).toBeDefined();
     });
 
     it('does not trigger when markdown-url-support fails', () => {
       const results = resultsMap(
         r('markdown-url-support', 'fail'),
         r('content-negotiation', 'fail'),
-        r('llms-txt-directive', 'fail'),
-        r('llms-txt-links-markdown', 'fail'),
+        r('llms-txt-directive-html', 'fail'),
       );
       const diags = evaluateDiagnostics(results);
       expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeUndefined();
     });
   });
 
+  describe('markdown-partially-discoverable', () => {
+    it('triggers when content negotiation passes but no HTML directive', () => {
+      const results = resultsMap(
+        r('markdown-url-support', 'pass'),
+        r('content-negotiation', 'pass'),
+        r('llms-txt-directive-html', 'fail'),
+      );
+      const diags = evaluateDiagnostics(results);
+      const diag = diags.find((d) => d.id === 'markdown-partially-discoverable');
+      expect(diag).toBeDefined();
+      expect(diag!.severity).toBe('warning');
+    });
+
+    it('does not trigger when HTML directive passes', () => {
+      const results = resultsMap(
+        r('markdown-url-support', 'pass'),
+        r('content-negotiation', 'pass'),
+        r('llms-txt-directive-html', 'pass'),
+      );
+      const diags = evaluateDiagnostics(results);
+      expect(diags.find((d) => d.id === 'markdown-partially-discoverable')).toBeUndefined();
+    });
+
+    it('does not trigger when content negotiation fails (undiscoverable fires instead)', () => {
+      const results = resultsMap(
+        r('markdown-url-support', 'pass'),
+        r('content-negotiation', 'fail'),
+        r('llms-txt-directive-html', 'fail'),
+      );
+      const diags = evaluateDiagnostics(results);
+      expect(diags.find((d) => d.id === 'markdown-partially-discoverable')).toBeUndefined();
+      expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeDefined();
+    });
+  });
+
   describe('truncated-index', () => {
     it('triggers when llms.txt exists but is too large', () => {
       const results = resultsMap(
@@ -123,7 +166,8 @@ describe('diagnostics', () => {
         }),
         r('markdown-url-support', 'pass'),
         r('content-negotiation', 'fail'),
-        r('llms-txt-directive', 'fail'),
+        r('llms-txt-directive-html', 'fail'),
+        r('llms-txt-directive-md', 'fail'),
         r('llms-txt-links-markdown', 'fail'),
       );
       const diags = evaluateDiagnostics(results);
@@ -169,6 +213,23 @@ describe('diagnostics', () => {
       expect(diag!.message).toContain('0% of links resolve');
     });
 
+    it('triggers when markdown is only partially discoverable', () => {
+      const results = resultsMap(
+        r('llms-txt-exists', 'fail'),
+        r('rendering-strategy', 'fail', {
+          serverRendered: 0,
+          sparseContent: 0,
+          spaShells: 20,
+        }),
+        r('markdown-url-support', 'pass'),
+        r('content-negotiation', 'pass'),
+        r('llms-txt-directive-html', 'fail'),
+      );
+      const diags = evaluateDiagnostics(results);
+      expect(diags.find((d) => d.id === 'markdown-partially-discoverable')).toBeDefined();
+      expect(diags.find((d) => d.id === 'no-viable-path')).toBeDefined();
+    });
+
     it('does not trigger when llms.txt links resolve at 10%+', () => {
       const results = resultsMap(
         r('llms-txt-exists', 'pass'),
@@ -220,18 +281,30 @@ describe('diagnostics', () => {
         r('page-size-html', 'fail', { failBucket: 5 }),
         r('markdown-url-support', 'pass'),
         r('content-negotiation', 'fail'),
-        r('llms-txt-directive', 'fail'),
+        r('llms-txt-directive-html', 'fail'),
+        r('llms-txt-directive-md', 'fail'),
         r('llms-txt-links-markdown', 'fail'),
       );
       const diags = evaluateDiagnostics(results);
       expect(diags.find((d) => d.id === 'page-size-no-markdown-escape')).toBeDefined();
     });
 
-    it('does not trigger when markdown is discoverable', () => {
+    it('triggers when markdown is only partially discoverable (content negotiation but no directive)', () => {
       const results = resultsMap(
         r('page-size-html', 'fail', { failBucket: 5 }),
         r('markdown-url-support', 'pass'),
         r('content-negotiation', 'pass'),
+        r('llms-txt-directive-html', 'fail'),
+      );
+      const diags = evaluateDiagnostics(results);
+      expect(diags.find((d) => d.id === 'page-size-no-markdown-escape')).toBeDefined();
+    });
+
+    it('does not trigger when markdown is discoverable via directive', () => {
+      const results = resultsMap(
+        r('page-size-html', 'fail', { failBucket: 5 }),
+        r('markdown-url-support', 'pass'),
+        r('llms-txt-directive-html', 'pass'),
       );
       const diags = evaluateDiagnostics(results);
       expect(diags.find((d) => d.id === 'page-size-no-markdown-escape')).toBeUndefined();
diff --git a/test/unit/scoring/proportions.test.ts b/test/unit/scoring/proportions.test.ts
index 061a70d..4713447 100644
--- a/test/unit/scoring/proportions.test.ts
+++ b/test/unit/scoring/proportions.test.ts
@@ -170,10 +170,10 @@ describe('proportions', () => {
     });
   });
 
-  describe('llms-txt-directive', () => {
+  describe('llms-txt-directive-html', () => {
     it('maps found boolean and position to pass/warn/fail', () => {
       const result = getCheckProportion(
-        makeResult('llms-txt-directive', 'warn', {
+        makeResult('llms-txt-directive-html', 'warn', {
           pageResults: [
             { url: '/a', found: true, positionPercent: 5 },
             { url: '/b', found: true, positionPercent: 60 },
@@ -189,7 +189,7 @@ describe('proportions', () => {
 
     it('excludes pages with errors', () => {
       const result = getCheckProportion(
-        makeResult('llms-txt-directive', 'pass', {
+        makeResult('llms-txt-directive-html', 'pass', {
           pageResults: [
             { url: '/a', found: true, positionPercent: 5 },
             { url: '/b', error: 'fetch failed' },
diff --git a/test/unit/scoring/resolutions.test.ts b/test/unit/scoring/resolutions.test.ts
index f848ea6..30befba 100644
--- a/test/unit/scoring/resolutions.test.ts
+++ b/test/unit/scoring/resolutions.test.ts
@@ -70,7 +70,8 @@ describe('resolutions', () => {
       'llms-txt-size',
       'llms-txt-links-resolve',
       'llms-txt-links-markdown',
-      'llms-txt-directive',
+      'llms-txt-directive-html',
+      'llms-txt-directive-md',
       'markdown-url-support',
       'content-negotiation',
       'rendering-strategy',
diff --git a/test/unit/scoring/score.test.ts b/test/unit/scoring/score.test.ts
index 3f83740..d1de347 100644
--- a/test/unit/scoring/score.test.ts
+++ b/test/unit/scoring/score.test.ts
@@ -49,7 +49,8 @@ describe('computeScore', () => {
       makeResult('llms-txt-size', 'content-discoverability', 'pass'),
       makeResult('llms-txt-links-resolve', 'content-discoverability', 'pass'),
       makeResult('llms-txt-links-markdown', 'content-discoverability', 'pass'),
-      makeResult('llms-txt-directive', 'content-discoverability', 'pass'),
+      makeResult('llms-txt-directive-html', 'content-discoverability', 'pass'),
+      makeResult('llms-txt-directive-md', 'content-discoverability', 'pass'),
       makeResult('markdown-url-support', 'markdown-availability', 'pass'),
       makeResult('content-negotiation', 'markdown-availability', 'pass'),
       makeResult('rendering-strategy', 'page-size', 'pass', {
@@ -228,7 +229,8 @@ describe('computeScore', () => {
     // is not discoverable
     const results: CheckResult[] = [
       makeResult('content-negotiation', 'markdown-availability', 'fail'),
-      makeResult('llms-txt-directive', 'content-discoverability', 'fail'),
+      makeResult('llms-txt-directive-html', 'content-discoverability', 'fail'),
+      makeResult('llms-txt-directive-md', 'content-discoverability', 'fail'),
       makeResult('llms-txt-links-markdown', 'content-discoverability', 'fail'),
       makeResult('page-size-markdown', 'page-size', 'pass'),
     ];
@@ -279,7 +281,8 @@ describe('computeScore', () => {
     const results: CheckResult[] = [
       makeResult('markdown-url-support', 'markdown-availability', 'pass'),
       makeResult('content-negotiation', 'markdown-availability', 'fail'),
-      makeResult('llms-txt-directive', 'content-discoverability', 'fail'),
+      makeResult('llms-txt-directive-html', 'content-discoverability', 'fail'),
+      makeResult('llms-txt-directive-md', 'content-discoverability', 'fail'),
       makeResult('llms-txt-links-markdown', 'content-discoverability', 'fail'),
     ];
     const score = computeScore(makeReport(results));
@@ -356,7 +359,8 @@ describe('computeScore', () => {
           markdownRate: 0,
           testedLinks: 20,
         }),
-        makeResult('llms-txt-directive', 'content-discoverability', 'fail'),
+        makeResult('llms-txt-directive-html', 'content-discoverability', 'fail'),
+        makeResult('llms-txt-directive-md', 'content-discoverability', 'fail'),
 
         // No markdown
         makeResult('markdown-url-support', 'markdown-availability', 'fail'),
@@ -405,7 +409,7 @@ describe('computeScore', () => {
 
       // Should have resolutions for failing checks
       expect(score.resolutions['llms-txt-links-markdown']).toBeDefined();
-      expect(score.resolutions['llms-txt-directive']).toBeDefined();
+      expect(score.resolutions['llms-txt-directive-html']).toBeDefined();
       expect(score.resolutions['markdown-url-support']).toBeDefined();
     });
   });
diff --git a/test/unit/scoring/tag-scores.test.ts b/test/unit/scoring/tag-scores.test.ts
index fa558da..fa0250c 100644
--- a/test/unit/scoring/tag-scores.test.ts
+++ b/test/unit/scoring/tag-scores.test.ts
@@ -322,8 +322,8 @@ describe('computeTagScores', () => {
             { url: 'https://example.com/b', classification: 'soft-404' },
           ],
         }),
-        // llms-txt-directive: found near top -> pass, found deep -> warn, not found -> fail
-        makeResult('llms-txt-directive', 'content-discoverability', 'warn', {
+        // llms-txt-directive-html: found near top -> pass, found deep -> warn, not found -> fail
+        makeResult('llms-txt-directive-html', 'content-discoverability', 'warn', {
           pageResults: [
             { url: 'https://example.com/a', found: true, positionPercent: 5 },
             { url: 'https://example.com/b', found: true, positionPercent: 80 },
@@ -348,7 +348,7 @@ describe('computeTagScores', () => {
     const checkScores: Record<string, CheckScore> = {
       'markdown-url-support': makeCheckScore(7),
       'http-status-codes': makeCheckScore(7),
-      'llms-txt-directive': makeCheckScore(7),
+      'llms-txt-directive-html': makeCheckScore(7),
       'cache-header-hygiene': makeCheckScore(2),
     };
 
@@ -366,8 +366,8 @@ describe('computeTagScores', () => {
     const httpStatus = checks.find((c) => c.checkId === 'http-status-codes')!;
     expect(httpStatus.proportion).toBe(0.5);
 
-    // llms-txt-directive: 1 pass, 1 warn, 1 fail -> (1 + 0.6*1) / 3
-    const directive = checks.find((c) => c.checkId === 'llms-txt-directive')!;
+    // llms-txt-directive-html: 1 pass, 1 warn, 1 fail -> (1 + 0.6*1) / 3
+    const directive = checks.find((c) => c.checkId === 'llms-txt-directive-html')!;
     expect(directive.pages[0].status).toBe('pass');
     expect(directive.pages[1].status).toBe('warn');
     expect(directive.pages[2].status).toBe('fail');
@@ -407,8 +407,8 @@ describe('computeTagScores', () => {
         makeResult('http-status-codes', 'url-stability', 'pass', {
           pageResults: [{ url: 'https://example.com/a', classification: 'unknown-value' }],
         }),
-        // llms-txt-directive: error -> skip
-        makeResult('llms-txt-directive', 'content-discoverability', 'pass', {
+        // llms-txt-directive-html: error -> skip
+        makeResult('llms-txt-directive-html', 'content-discoverability', 'pass', {
           pageResults: [{ url: 'https://example.com/a', error: 'fetch failed' }],
         }),
         // section-header-quality: hasCrossGroupGeneric -> warn
@@ -430,7 +430,7 @@ describe('computeTagScores', () => {
       'redirect-behavior': makeCheckScore(4),
       'auth-gate-detection': makeCheckScore(10),
       'http-status-codes': makeCheckScore(7),
-      'llms-txt-directive': makeCheckScore(7),
+      'llms-txt-directive-html': makeCheckScore(7),
       'section-header-quality': makeCheckScore(2),
     };
 
diff --git a/test/unit/scoring/weights.test.ts b/test/unit/scoring/weights.test.ts
index 8b76222..5a25ceb 100644
--- a/test/unit/scoring/weights.test.ts
+++ b/test/unit/scoring/weights.test.ts
@@ -2,8 +2,8 @@ import { describe, it, expect } from 'vitest';
 import { CHECK_WEIGHTS, getCheckWeight } from '../../../src/scoring/weights.js';
 
 describe('weights', () => {
-  it('has weights for all 22 checks', () => {
-    expect(Object.keys(CHECK_WEIGHTS)).toHaveLength(22);
+  it('has weights for all 23 checks', () => {
+    expect(Object.keys(CHECK_WEIGHTS)).toHaveLength(23);
   });
 
   it('returns undefined for unknown check IDs', () => {
@@ -24,24 +24,24 @@ describe('weights', () => {
     expect(getCheckWeight('cache-header-hygiene')!.weight).toBe(2);
   });
 
-  it('has 3 critical, 7 high, 9 medium, 3 low checks', () => {
+  it('has 3 critical, 8 high, 10 medium, 2 low checks', () => {
     const tiers = Object.values(CHECK_WEIGHTS).map((w) => w.tier);
     expect(tiers.filter((t) => t === 'critical')).toHaveLength(3);
     expect(tiers.filter((t) => t === 'high')).toHaveLength(8);
-    expect(tiers.filter((t) => t === 'medium')).toHaveLength(9);
+    expect(tiers.filter((t) => t === 'medium')).toHaveLength(10);
     expect(tiers.filter((t) => t === 'low')).toHaveLength(2);
   });
 
-  it('sums to 126 max raw score', () => {
+  it('sums to 130 max raw score', () => {
     const total = Object.values(CHECK_WEIGHTS).reduce((sum, w) => sum + w.weight, 0);
-    expect(total).toBe(126);
+    expect(total).toBe(130);
   });
 
   it('assigns warn coefficients correctly', () => {
     // 0.75 tier
     expect(getCheckWeight('llms-txt-valid')!.warnCoefficient).toBe(0.75);
     // 0.60 tier
-    expect(getCheckWeight('llms-txt-directive')!.warnCoefficient).toBe(0.6);
+    expect(getCheckWeight('llms-txt-directive-html')!.warnCoefficient).toBe(0.6);
     // 0.50 tier
     expect(getCheckWeight('llms-txt-exists')!.warnCoefficient).toBe(0.5);
     // 0.25 tier

From 6dee3a9796c8c60b8e8ebac86781820aa0129324 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sat, 25 Apr 2026 16:54:57 -0400
Subject: [PATCH 05/13] Add single-page scoring reliability and other
 diagnostics

---
 SCORING.md                                    |  52 +++-
 docs/agent-score-calculation.md               |  15 ++
 docs/interaction-diagnostics.md               |  42 ++++
 docs/reference/programmatic-api.md            |   3 +
 docs/reference/scoring-api.md                 |  27 +-
 docs/what-is-agent-score.md                   |   2 +-
 scoring-reference.md                          |  87 ++++++-
 .../llms-txt-links-resolve.ts                 |  22 ++
 src/cli/formatters/scorecard.ts               |   7 +-
 src/index.ts                                  |   2 +
 src/runner.ts                                 |   3 +
 src/scoring/diagnostics.ts                    | 154 +++++++++++-
 src/scoring/index.ts                          |   1 +
 src/scoring/score.ts                          |  55 +++-
 src/scoring/types.ts                          |   8 +-
 src/types.ts                                  |   4 +
 test/unit/cli/scorecard-formatter.test.ts     |  55 ++++
 test/unit/scoring/diagnostics.test.ts         | 235 ++++++++++++++++--
 test/unit/scoring/score.test.ts               | 135 +++++++++-
 19 files changed, 848 insertions(+), 61 deletions(-)

diff --git a/SCORING.md b/SCORING.md
index 85879fa..1fa67b5 100644
--- a/SCORING.md
+++ b/SCORING.md
@@ -121,7 +121,23 @@ For checks that test multiple pages (like `page-size-html` or `rendering-strateg
 score = (sum of check scores) / (sum of weights for non-skipped checks) × 100
 ```
 
-Rounded to the nearest integer.
+Rounded to the nearest integer. Checks marked as `notApplicable` (see below) are excluded from both numerator and denominator.
+
+### Insufficient-data handling (scoreDisplayMode)
+
+When the tool discovers only a single page using automatic discovery (`random` or `deterministic` sampling), page-level check scores are unreliable because they represent one page out of potentially thousands. In this case:
+
+- **Page-level checks** get `scoreDisplayMode: "notApplicable"` and are excluded from the overall score calculation.
+- **Site-level checks** (llms.txt checks, coverage, auth-alternative-access) remain `scoreDisplayMode: "numeric"` and are scored normally.
+- **Category scores** where all checks are `notApplicable` become `null` and render as a dash in the scorecard.
+- **Categories with a mix** of page-level and site-level checks score based on the site-level checks only.
+
+This follows the Lighthouse convention: don't present a number when the data behind it isn't meaningful.
+
+This behavior does **not** apply when:
+
+- `--sampling curated` or `--urls`: the user explicitly chose pages to test.
+- `--sampling none`: the user opted out of sampling entirely.
 
 ### Warn coefficients
 
@@ -155,6 +171,8 @@ Some problems are severe enough that no amount of other good behavior should com
 
 When multiple caps apply, the lowest one wins.
 
+The `rendering-strategy` and `auth-gate-detection` caps do not apply when the check has `scoreDisplayMode: "notApplicable"` (insufficient data). If we don't trust the data enough to include it in the score, we don't trust it enough to cap the score either.
+
 ## Interaction diagnostics
 
 Some problems only become visible when you look at multiple checks together. The scorecard surfaces these as **interaction diagnostics**: system-level findings that emerge from combinations of check results.
@@ -217,6 +235,38 @@ Some problems only become visible when you look at multiple checks together. The
 
 **What to do**: Either reduce HTML page sizes (break large pages, reduce inline CSS/JS) or provide markdown versions and make them discoverable.
 
+### Single-page sample
+
+**Triggers when** automatic discovery (`random` or `deterministic` sampling) found only one page to test.
+
+**What it means**: Page-level category scores (page size, content structure, URL stability, etc.) are based on a single page and may not represent the site. These categories are marked as N/A in the score.
+
+**What to do**: If your site has an llms.txt, ensure it contains working links so the tool can discover more pages. If testing a preview deployment, use `--canonical-origin` to rewrite cross-origin llms.txt links. You can also provide specific pages with `--urls`.
+
+### All llms.txt links are cross-origin
+
+**Triggers when** every link in your llms.txt points to a different origin than the one being tested.
+
+**What it means**: This typically happens when testing a preview or staging deployment whose llms.txt still references the production domain. The tool filters cross-origin links during page discovery, so it falls back to testing a single page.
+
+**What to do**: Use `--canonical-origin <production-origin>` to rewrite cross-origin links during testing.
+
+### Gzipped sitemap skipped
+
+**Triggers when** a gzipped sitemap (e.g. `sitemap.xml.gz`) was encountered during URL discovery and skipped because gzipped sitemaps are not yet supported.
+
+**What it means**: If the gzipped sitemap is the only sitemap source, URL discovery may have found fewer pages than expected.
+
+**What to do**: Provide an uncompressed `sitemap.xml` alongside the gzipped version, or supply specific pages via `--urls`.
+
+### Severe rate limiting
+
+**Triggers when** more than 20% of tested URLs returned HTTP 429 (Too Many Requests).
+
+**What it means**: The target site is rate-limiting requests from the tool. Check results may be unreliable because rate-limited requests are not retried indefinitely.
+
+**What to do**: Increase `--request-delay` to slow down requests, or contact the site operator to allowlist your IP or user-agent for testing.
+
 ## Cluster coefficients
 
 Some checks have **conditional value**: their contribution depends on whether the conditions needed to realize that value are actually met. The score accounts for this through cluster coefficients that scale a check's contribution up or down.
diff --git a/docs/agent-score-calculation.md b/docs/agent-score-calculation.md
index 99f020e..a9cb647 100644
--- a/docs/agent-score-calculation.md
+++ b/docs/agent-score-calculation.md
@@ -16,6 +16,7 @@ Each check earns a proportion of its weight based on its result:
 - **Warn**: Partial weight (see [warn coefficients](#warn-coefficients) below)
 - **Fail**: Zero
 - **Skip**: Excluded from both the numerator and denominator
+- **Not applicable**: Excluded (see [insufficient data](#insufficient-data) below)
 
 The score is rounded to the nearest integer and mapped to a [letter grade](/what-is-agent-score#letter-grades).
 
@@ -157,6 +158,20 @@ Some problems are severe enough that no amount of other passing checks should co
 
 When multiple caps apply, the lowest one wins.
 
+The `rendering-strategy` and `auth-gate-detection` caps do not apply when the check is marked as not applicable due to [insufficient data](#insufficient-data). If there isn't enough data to include the check in the score, there isn't enough data to cap the score based on it either.
+
+## Insufficient data
+
+When automatic page discovery finds only a single page (using `random` or `deterministic` sampling), page-level check scores are unreliable because they represent one page out of potentially thousands. In this case:
+
+- **Page-level checks** (those that test sampled pages like `page-size-html`, `rendering-strategy`, `http-status-codes`, etc.) are marked as "not applicable" and excluded from the score.
+- **Site-level checks** (llms.txt checks, coverage, auth-alternative-access) are scored normally.
+- **Category scores** where all checks are not applicable display as a dash instead of a number.
+
+This typically happens when a site has no llms.txt or its llms.txt links point to a different origin (common with preview deployments). A [`single-page-sample` diagnostic](/interaction-diagnostics#single-page-sample) fires to explain the situation.
+
+This behavior does not apply when you explicitly choose pages with `--urls` or `--sampling curated`, or when you use `--sampling none`. If you intentionally test a single page, the score reflects that page.
+
 ## Cluster coefficients
 
 Some checks only matter if agents can actually reach the content they measure. If agents can't discover your markdown path, measuring markdown quality is pointless. The score handles this through cluster coefficients that scale both a check's score and its weight proportionally.
diff --git a/docs/interaction-diagnostics.md b/docs/interaction-diagnostics.md
index 186b4e3..432a784 100644
--- a/docs/interaction-diagnostics.md
+++ b/docs/interaction-diagnostics.md
@@ -73,3 +73,45 @@ These diagnostics appear in the "Interaction Diagnostics" section of the `--form
 **What to do**: Either reduce HTML page sizes (break large pages into smaller ones, move inline CSS/JS to external files) or provide markdown versions and make them discoverable via content negotiation or llms.txt links. See [Page Size checks](/checks/page-size) for the specific thresholds.
 
 **Score impact**: No direct score cap, but the combination of failing page-size checks with no markdown alternative typically results in low category scores for both Page Size and Markdown Availability.
+
+## Single-page sample
+
+**Triggers when** automatic page discovery (`random` or `deterministic` sampling) found only one page to test.
+
+**What it means**: Page-level category scores (Page Size, Content Structure, URL Stability, etc.) are based on a single page and may not represent the site. These categories are marked as N/A in the score rather than showing potentially misleading numbers.
+
+**What to do**: If your site has an llms.txt, ensure it contains working links so the tool can discover more pages. If testing a preview deployment, use `--canonical-origin` to rewrite cross-origin llms.txt links. You can also provide specific pages with `--urls` to test exactly the pages you care about.
+
+This diagnostic does not fire when you explicitly choose pages with `--urls`, `--sampling curated`, or `--sampling none`.
+
+**Score impact**: Page-level checks are excluded from the overall score and their categories show as N/A. Only site-level checks (llms.txt checks, coverage, auth-alternative-access) contribute to the score.
+
+## All llms.txt links are cross-origin
+
+**Triggers when** every link in your llms.txt points to a different origin than the one being tested.
+
+**What it means**: This typically happens when testing a preview or staging deployment whose llms.txt still references the production domain. The tool filters cross-origin links during page discovery, so it falls back to testing a single page. You'll usually see this alongside the [single-page sample](#single-page-sample) diagnostic.
+
+**What to do**: Use `--canonical-origin <production-origin>` to rewrite cross-origin links during testing. For example: `npx afdocs check https://preview.example.com --canonical-origin https://docs.example.com`.
+
+**Score impact**: Indirect. By reducing discovered pages to one, it triggers the single-page sample behavior described above.
+
+## Gzipped sitemap skipped
+
+**Triggers when** a gzipped sitemap (e.g. `sitemap.xml.gz`) was encountered during URL discovery and skipped because gzipped sitemaps are not yet supported.
+
+**What it means**: If the gzipped sitemap is the only sitemap source, URL discovery may have found fewer pages than expected. This can reduce the representativeness of page-level check results.
+
+**What to do**: Provide an uncompressed `sitemap.xml` alongside the gzipped version, or supply specific pages via `--urls` for targeted testing.
+
+**Score impact**: No direct score impact, but fewer discovered pages may reduce the representativeness of results.
+
+## Severe rate limiting
+
+**Triggers when** more than 20% of tested URLs returned HTTP 429 (Too Many Requests) across all checks that make HTTP requests.
+
+**What it means**: The target site is rate-limiting requests from the tool. Check results may be unreliable because rate-limited requests are not retried indefinitely, so some pages may not have been fully tested.
+
+**What to do**: Increase `--request-delay` to slow down requests (the default is 200ms), or contact the site operator to allowlist your IP or user-agent for testing.
+
+**Score impact**: No direct score impact, but rate-limited requests may cause checks to report incomplete data, leading to scores that don't reflect the site's actual state.
diff --git a/docs/reference/programmatic-api.md b/docs/reference/programmatic-api.md
index 301d220..809dabe 100644
--- a/docs/reference/programmatic-api.md
+++ b/docs/reference/programmatic-api.md
@@ -23,6 +23,8 @@ for (const result of report.results) {
 - `timestamp` — when the check ran
 - `results` — array of `CheckResult` objects (one per check)
 - `summary` — counts by status (pass, warn, fail, skip, error)
+- `testedPages` — number of pages tested by page-level checks (present when page discovery ran)
+- `samplingStrategy` — the sampling strategy used (`random`, `deterministic`, `curated`, or `none`)
 
 ## Run with options
 
@@ -104,6 +106,7 @@ import type {
   ReportResult,
   RunnerOptions,
   CheckOptions,
+  SamplingStrategy, // 'random' | 'deterministic' | 'curated' | 'none'
   AgentDocsConfig,
   CuratedPageEntry,
   PageConfigEntry,
diff --git a/docs/reference/scoring-api.md b/docs/reference/scoring-api.md
index 3bcdafe..bbaa142 100644
--- a/docs/reference/scoring-api.md
+++ b/docs/reference/scoring-api.md
@@ -12,7 +12,9 @@ const score = computeScore(report);
 
 console.log(score.overall); // 72
 console.log(score.grade); // 'C'
-console.log(score.categoryScores); // { 'content-discoverability': { score: 80, grade: 'B' }, ... }
+console.log(score.categoryScores);
+// { 'content-discoverability': { score: 80, grade: 'B' }, ... }
+// Categories may have null score/grade when all checks lack sufficient data
 console.log(score.diagnostics); // [{ id: 'markdown-undiscoverable', severity: 'warning', ... }]
 console.log(score.resolutions); // { 'llms-txt-directive-html': 'Add a visually-hidden element...' }
 ```
@@ -33,16 +35,18 @@ This is the same function; the subpath is provided for consumers who want a narr
 
 `computeScore` returns a `ScoreResult` with these fields:
 
-| Field            | Type                            | Description                                                               |
-| ---------------- | ------------------------------- | ------------------------------------------------------------------------- |
-| `overall`        | `number`                        | The overall score (0-100)                                                 |
-| `grade`          | `Grade`                         | Letter grade (`A+`, `A`, `B`, `C`, `D`, `F`)                              |
-| `categoryScores` | `Record<string, CategoryScore>` | Per-category score and grade                                              |
-| `checkScores`    | `Record<string, CheckScore>`    | Per-check scoring details (weight, coefficient, proportion, earned score) |
-| `diagnostics`    | `Diagnostic[]`                  | Interaction diagnostics that fired                                        |
-| `caps`           | `ScoreCap[]`                    | Score caps that were applied                                              |
-| `resolutions`    | `Record<string, string>`        | Fix suggestions keyed by check ID                                         |
-| `tagScores`      | `Record<string, TagScore>`      | Per-tag aggregate scores (present when curated pages have tags)           |
+| Field            | Type                            | Description                                                                                                                           |
+| ---------------- | ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
+| `overall`        | `number`                        | The overall score (0-100)                                                                                                             |
+| `grade`          | `Grade`                         | Letter grade (`A+`, `A`, `B`, `C`, `D`, `F`)                                                                                          |
+| `categoryScores` | `Record<string, CategoryScore>` | Per-category score and grade. `score` and `grade` are `null` when all checks in the category are `notApplicable` (insufficient data). |
+| `checkScores`    | `Record<string, CheckScore>`    | Per-check scoring details (weight, coefficient, proportion, earned score, scoreDisplayMode)                                           |
+| `diagnostics`    | `Diagnostic[]`                  | Interaction diagnostics that fired                                                                                                    |
+| `cap`            | `ScoreCap`                      | Score cap that was applied (present only when a cap reduced the score)                                                                |
+| `resolutions`    | `Record<string, string>`        | Fix suggestions keyed by check ID                                                                                                     |
+| `tagScores`      | `Record<string, TagScore>`      | Per-tag aggregate scores (present when curated pages have tags)                                                                       |
+
+Each `CheckScore` includes a `scoreDisplayMode` field (`"numeric"` or `"notApplicable"`). When automatic page discovery finds only one page, page-level checks are marked `"notApplicable"` and excluded from overall and category score calculations. See [Insufficient data](/agent-score-calculation#insufficient-data) for details.
 
 ## TagScore
 
@@ -105,6 +109,7 @@ import type {
   Diagnostic,
   DiagnosticSeverity, // 'info' | 'warning' | 'critical'
   Grade, // 'A+' | 'A' | 'B' | 'C' | 'D' | 'F'
+  ScoreDisplayMode, // 'numeric' | 'notApplicable'
 } from 'afdocs';
 ```
 
diff --git a/docs/what-is-agent-score.md b/docs/what-is-agent-score.md
index 43fcaad..524d760 100644
--- a/docs/what-is-agent-score.md
+++ b/docs/what-is-agent-score.md
@@ -79,7 +79,7 @@ Agent-Friendly Docs Scorecard
             Fix: Add a blockquote near the top of each page ...
 ```
 
-The [Interaction Diagnostics](/interaction-diagnostics) section covers amplification effects between checks. When some check failures compound, the agent impact is more pronounced than individual check failures imply. This includes things like having markdown support that agents can't discover, or page sizes that exceed limits with no alternate format available.
+The [Interaction Diagnostics](/interaction-diagnostics) section covers amplification effects between checks. When some check failures compound, the agent impact is more pronounced than individual check failures imply. This includes things like having markdown support that agents can't discover, page sizes that exceed limits with no alternate format available, or the tool discovering only a single page to test (which causes page-level categories to display as N/A rather than showing potentially misleading scores).
 
 ## What to do with your score
 
diff --git a/scoring-reference.md b/scoring-reference.md
index 7cd3c52..07c376b 100644
--- a/scoring-reference.md
+++ b/scoring-reference.md
@@ -193,13 +193,46 @@ same destination are deduplicated before scoring (e.g., if `/docs/llms.txt`
 ### Overall Score
 
 ```
-score = (sum of check_scores for non-skipped checks)
-      / (sum of weights for non-skipped checks)
+score = (sum of check_scores for non-skipped, non-N/A checks)
+      / (sum of weights for non-skipped, non-N/A checks)
       * 100
 ```
 
 Rounded to the nearest integer.
 
+### Score Display Mode (Insufficient Data)
+
+Each `CheckScore` has a `scoreDisplayMode` field:
+
+- `"numeric"` (default): normal scored result.
+- `"notApplicable"`: insufficient data to score meaningfully. The check ran
+  but its score is excluded from the overall and category calculations.
+
+The `notApplicable` mode triggers when all of:
+
+- `samplingStrategy` is `random` or `deterministic` (discovery-based).
+- `testedPages` equals 1.
+- The check is page-level (tests sampled pages, not site-level resources).
+
+Page-level checks: `llms-txt-directive-html`, `llms-txt-directive-md`,
+`markdown-url-support`, `content-negotiation`, `markdown-code-fence-validity`,
+`page-size-markdown`, `page-size-html`, `markdown-content-parity`,
+`content-start-position`, `tabbed-content-serialization`,
+`section-header-quality`, `http-status-codes`, `redirect-behavior`,
+`rendering-strategy`, `auth-gate-detection`, `cache-header-hygiene`.
+
+Site-level checks (always `numeric`): `llms-txt-exists`, `llms-txt-valid`,
+`llms-txt-size`, `llms-txt-links-resolve`, `llms-txt-links-markdown`,
+`llms-txt-coverage`, `auth-alternative-access`.
+
+**Category scores**: When all scored checks in a category are `notApplicable`,
+the category score is `null` (rendered as a dash in the scorecard). Mixed
+categories (some N/A, some numeric) score based on numeric checks only.
+
+**ReportResult fields**: `testedPages` (number of pages tested by page-level
+checks) and `samplingStrategy` (the strategy used for this run) are added to
+`ReportResult` so the scoring layer can detect the insufficient-data condition.
+
 ### Critical Check Score Caps
 
 Critical checks (weight 10) can cap the overall score when they fail broadly.
@@ -213,6 +246,8 @@ or its **status** (for single-resource checks):
 For each critical check:
   if single-resource AND status == fail:
     apply cap (total failure)
+  if multi-page AND scoreDisplayMode == 'notApplicable':
+    skip (insufficient data to justify a cap)
   if multi-page AND proportion <= 0.25 (75%+ of pages fail):
     cap overall score at 39 (F)
   if multi-page AND proportion <= 0.50 (50%+ of pages fail):
@@ -516,6 +551,54 @@ in dependency order: `markdown-undiscoverable` and
   inline CSS/JS), or provide markdown versions and ensure agents can discover
   them via content negotiation or an llms.txt directive.
 
+#### `single-page-sample`
+
+- **Severity**: warning
+- **Triggers when**: `samplingStrategy` is `random` or `deterministic` AND
+  `testedPages` equals 1.
+- **Message**: Only one page was discovered and tested. Page-level category
+  scores are based on a single page and may not represent the site. These
+  categories are marked as N/A in the score.
+- **Resolution**: If your site has an llms.txt, ensure it contains working
+  links so the tool can discover more pages. If testing a preview deployment,
+  use --canonical-origin to rewrite cross-origin llms.txt links. You can also
+  provide specific pages with --urls.
+
+#### `cross-origin-llms-txt`
+
+- **Severity**: warning
+- **Triggers when**: `llms-txt-links-resolve` ran AND its details show
+  `sameOrigin.total === 0` AND `crossOrigin.total > 0`.
+- **Message**: All {n} links in your llms.txt point to {dominant_origin}, not
+  the origin being tested. This typically happens when testing a preview or
+  staging deployment whose llms.txt still references the production domain.
+  Page discovery falls back to a single page.
+- **Resolution**: Use --canonical-origin <production-origin> to rewrite
+  cross-origin links during testing.
+
+#### `gzipped-sitemap-skipped`
+
+- **Severity**: info
+- **Triggers when**: Any check's `details.discoveryWarnings` array contains
+  a string matching "gzipped sitemap".
+- **Message**: A gzipped sitemap was skipped during URL discovery. If this
+  is the only sitemap source, it may have reduced the number of pages
+  discovered for testing.
+- **Resolution**: Provide an uncompressed sitemap.xml alongside the gzipped
+  version, or supply specific pages via --urls for targeted testing.
+
+#### `rate-limiting-severe`
+
+- **Severity**: warning
+- **Triggers when**: Across all checks that report `details.rateLimited`,
+  the total rate-limited count exceeds 20% of the total tested count
+  (derived from `details.testedLinks` or `details.pageResults.length`).
+- **Message**: {pct}% of tested URLs returned HTTP 429 (rate limited). Check
+  results may be unreliable because rate-limited requests are not retried
+  indefinitely.
+- **Resolution**: Increase --request-delay to slow down requests, or contact
+  the site operator to allowlist your IP or user-agent for testing.
+
 ---
 
 ## Score Display
diff --git a/src/checks/content-discoverability/llms-txt-links-resolve.ts b/src/checks/content-discoverability/llms-txt-links-resolve.ts
index fee728c..7fcb92f 100644
--- a/src/checks/content-discoverability/llms-txt-links-resolve.ts
+++ b/src/checks/content-discoverability/llms-txt-links-resolve.ts
@@ -152,6 +152,27 @@ async function checkLlmsTxtLinksResolve(ctx: CheckContext): Promise<CheckResult>
       ? ` (${crossBroken.length} external link${crossBroken.length === 1 ? '' : 's'} also failed; may be bot-detection or rate-limiting)`
       : '';
 
+  // Find the most common cross-origin domain for diagnostics
+  let dominantCrossOrigin: string | null = null;
+  if (crossOriginLinks.length > 0) {
+    const originCounts = new Map<string, number>();
+    for (const url of crossOriginLinks) {
+      try {
+        const o = new URL(url).origin;
+        originCounts.set(o, (originCounts.get(o) ?? 0) + 1);
+      } catch {
+        // skip unparseable
+      }
+    }
+    let maxCount = 0;
+    for (const [origin, count] of originCounts) {
+      if (count > maxCount) {
+        maxCount = count;
+        dominantCrossOrigin = origin;
+      }
+    }
+  }
+
   const details: Record<string, unknown> = {
     totalLinks,
     sameOrigin: {
@@ -171,6 +192,7 @@ async function checkLlmsTxtLinksResolve(ctx: CheckContext): Promise<CheckResult>
       broken: crossBroken.map((b) => ({ url: b.url, status: b.status, error: b.error })),
       fetchErrors: crossFetchErrors,
       rateLimited: crossRateLimited,
+      dominantOrigin: dominantCrossOrigin,
     },
     // Flat fields kept for backward compatibility
     testedLinks: sameResults.length + crossResults.length,
diff --git a/src/cli/formatters/scorecard.ts b/src/cli/formatters/scorecard.ts
index 385824f..2b77023 100644
--- a/src/cli/formatters/scorecard.ts
+++ b/src/cli/formatters/scorecard.ts
@@ -45,10 +45,13 @@ function formatLocalTime(iso: string): string {
   return d.toLocaleString();
 }
 
-function formatCategoryLine(name: string, score: number, grade: string): string {
+function formatCategoryLine(name: string, score: number | null, grade: string | null): string {
   const paddedName = name.padEnd(36);
+  if (score === null) {
+    return `    ${paddedName} ${chalk.dim('–'.padStart(9))} ${chalk.dim('(N/A)')}`;
+  }
   const scoreStr = `${score} / 100`;
-  const coloredGrade = gradeColor(grade)(`(${grade})`);
+  const coloredGrade = gradeColor(grade!)(`(${grade})`);
   return `    ${paddedName} ${scoreStr.padStart(9)} ${coloredGrade}`;
 }
 
diff --git a/src/index.ts b/src/index.ts
index 4209c75..06942b7 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -11,6 +11,7 @@ export type {
   AgentDocsConfig,
   DiscoveredFile,
   SizeThresholds,
+  SamplingStrategy,
   CuratedPageEntry,
   PageConfigEntry,
 } from './types.js';
@@ -32,4 +33,5 @@ export type {
   Diagnostic,
   DiagnosticSeverity,
   Grade,
+  ScoreDisplayMode,
 } from './scoring/types.js';
diff --git a/src/runner.ts b/src/runner.ts
index fd3b242..1745dec 100644
--- a/src/runner.ts
+++ b/src/runner.ts
@@ -154,6 +154,7 @@ export async function runChecks(
 
   const urlTags = ctx._sampledPages?.urlTags;
   const discoverySources = ctx._sampledPages?.sources;
+  const testedPages = ctx._sampledPages?.urls.length;
 
   return {
     url: baseUrl,
@@ -163,5 +164,7 @@ export async function runChecks(
     summary,
     ...(urlTags && { urlTags }),
     ...(discoverySources && { discoverySources }),
+    ...(testedPages !== undefined && { testedPages }),
+    samplingStrategy: ctx.options.samplingStrategy,
   };
 }
diff --git a/src/scoring/diagnostics.ts b/src/scoring/diagnostics.ts
index cf89375..77f0ddc 100644
--- a/src/scoring/diagnostics.ts
+++ b/src/scoring/diagnostics.ts
@@ -1,12 +1,20 @@
-import type { CheckResult } from '../types.js';
+import type { CheckResult, ReportResult } from '../types.js';
 import type { Diagnostic, DiagnosticSeverity } from './types.js';
 
 interface DiagnosticDefinition {
   id: string;
   severity: DiagnosticSeverity;
   /** Evaluated in dependency order. Can reference prior diagnostic results. */
-  triggers: (results: Map<string, CheckResult>, triggered: Set<string>) => boolean;
-  message: (results: Map<string, CheckResult>, triggered: Set<string>) => string;
+  triggers: (
+    results: Map<string, CheckResult>,
+    triggered: Set<string>,
+    report: ReportResult,
+  ) => boolean;
+  message: (
+    results: Map<string, CheckResult>,
+    triggered: Set<string>,
+    report: ReportResult,
+  ) => string;
   resolution: string;
 }
 
@@ -245,23 +253,157 @@ const DIAGNOSTIC_DEFINITIONS: DiagnosticDefinition[] = [
       'CSS/JS), or provide markdown versions and ensure agents can discover ' +
       'them via content negotiation or an llms.txt directive.',
   },
+
+  // --- run-level diagnostics (don't depend on other diagnostics) ---
+
+  {
+    id: 'single-page-sample',
+    severity: 'warning',
+    triggers: (_results, _triggered, report) => {
+      const isDiscoveryBased =
+        report.samplingStrategy === 'random' || report.samplingStrategy === 'deterministic';
+      return isDiscoveryBased && report.testedPages === 1;
+    },
+    message: () =>
+      'Only one page was discovered and tested. Page-level category scores ' +
+      '(page size, content structure, URL stability, etc.) are based on a ' +
+      'single page and may not represent the site. These categories are ' +
+      'marked as N/A in the score.',
+    resolution:
+      'If your site has an llms.txt, ensure it contains working links so ' +
+      'the tool can discover more pages. If testing a preview deployment, ' +
+      'use --canonical-origin to rewrite cross-origin llms.txt links. You ' +
+      'can also provide specific pages with --urls.',
+  },
+
+  {
+    id: 'cross-origin-llms-txt',
+    severity: 'warning',
+    triggers: (results) => {
+      const linkResolve = results.get('llms-txt-links-resolve');
+      if (!linkResolve || linkResolve.status === 'skip') return false;
+      const d = linkResolve.details;
+      if (!d) return false;
+      const sameOrigin = d.sameOrigin as { total?: number } | undefined;
+      const crossOrigin = d.crossOrigin as { total?: number } | undefined;
+      return (sameOrigin?.total ?? 0) === 0 && (crossOrigin?.total ?? 0) > 0;
+    },
+    message: (results) => {
+      const d = results.get('llms-txt-links-resolve')?.details;
+      const crossOrigin = d?.crossOrigin as { total?: number; dominantOrigin?: string } | undefined;
+      const total = crossOrigin?.total ?? 0;
+      const dominant = crossOrigin?.dominantOrigin ?? 'an external origin';
+      return (
+        `All ${total} links in your llms.txt point to ${dominant}, not ` +
+        'the origin being tested. This typically happens when testing a ' +
+        'preview or staging deployment whose llms.txt still references the ' +
+        'production domain. Page discovery falls back to a single page.'
+      );
+    },
+    resolution:
+      'Use --canonical-origin <production-origin> to rewrite cross-origin ' +
+      'links during testing. For example: --canonical-origin https://docs.example.com',
+  },
+
+  {
+    id: 'gzipped-sitemap-skipped',
+    severity: 'info',
+    triggers: (results) => {
+      for (const result of results.values()) {
+        const warnings = result.details?.discoveryWarnings as string[] | undefined;
+        if (warnings?.some((w) => w.includes('gzipped sitemap'))) return true;
+      }
+      return false;
+    },
+    message: (results) => {
+      const urls: string[] = [];
+      for (const result of results.values()) {
+        const warnings = result.details?.discoveryWarnings as string[] | undefined;
+        if (!warnings) continue;
+        for (const w of warnings) {
+          if (w.includes('gzipped sitemap')) {
+            const match = w.match(/:\s*(.+)$/);
+            if (match) urls.push(match[1]);
+          }
+        }
+      }
+      const urlNote = urls.length > 0 ? ` (${urls.join(', ')})` : '';
+      return (
+        `A gzipped sitemap was skipped during URL discovery${urlNote}. ` +
+        'If this is the only sitemap source, it may have reduced the number ' +
+        'of pages discovered for testing.'
+      );
+    },
+    resolution:
+      'Provide an uncompressed sitemap.xml alongside the gzipped version, ' +
+      'or supply specific pages via --urls for targeted testing.',
+  },
+
+  {
+    id: 'rate-limiting-severe',
+    severity: 'warning',
+    triggers: (results) => {
+      let totalTested = 0;
+      let totalRateLimited = 0;
+      for (const result of results.values()) {
+        const d = result.details;
+        if (!d) continue;
+        const rl = d.rateLimited as number | undefined;
+        if (rl === undefined) continue;
+
+        const pageResults = d.pageResults as unknown[] | undefined;
+        const testedLinks = d.testedLinks as number | undefined;
+        const tested = testedLinks ?? pageResults?.length ?? 0;
+
+        totalTested += tested;
+        totalRateLimited += rl;
+      }
+      return totalTested > 0 && totalRateLimited / totalTested > 0.2;
+    },
+    message: (results) => {
+      let totalTested = 0;
+      let totalRateLimited = 0;
+      for (const result of results.values()) {
+        const d = result.details;
+        if (!d) continue;
+        const rl = d.rateLimited as number | undefined;
+        if (rl === undefined) continue;
+        const pageResults = d.pageResults as unknown[] | undefined;
+        const testedLinks = d.testedLinks as number | undefined;
+        totalTested += testedLinks ?? pageResults?.length ?? 0;
+        totalRateLimited += rl;
+      }
+      const pct = totalTested > 0 ? Math.round((totalRateLimited / totalTested) * 100) : 0;
+      return (
+        `${pct}% of tested URLs returned HTTP 429 (rate limited). Check ` +
+        'results may be unreliable because rate-limited requests are not ' +
+        'retried indefinitely.'
+      );
+    },
+    resolution:
+      'Increase --request-delay to slow down requests, or contact the site ' +
+      'operator to allowlist your IP or user-agent for testing.',
+  },
 ];
 
 /**
  * Evaluate all interaction diagnostics against a set of check results.
  * Returns triggered diagnostics in evaluation order.
  */
-export function evaluateDiagnostics(results: Map<string, CheckResult>): Diagnostic[] {
+export function evaluateDiagnostics(
+  results: Map<string, CheckResult>,
+  report: ReportResult,
+): Diagnostic[] {
   const triggered = new Set<string>();
   const diagnostics: Diagnostic[] = [];
 
   for (const def of DIAGNOSTIC_DEFINITIONS) {
-    if (def.triggers(results, triggered)) {
+    if (def.triggers(results, triggered, report)) {
       triggered.add(def.id);
       diagnostics.push({
         id: def.id,
         severity: def.severity,
-        message: def.message(results, triggered),
+        message: def.message(results, triggered, report),
         resolution: def.resolution,
       });
     }
diff --git a/src/scoring/index.ts b/src/scoring/index.ts
index ce78052..0d4b00f 100644
--- a/src/scoring/index.ts
+++ b/src/scoring/index.ts
@@ -17,4 +17,5 @@ export type {
   Diagnostic,
   DiagnosticSeverity,
   Grade,
+  ScoreDisplayMode,
 } from './types.js';
diff --git a/src/scoring/score.ts b/src/scoring/score.ts
index 8d2c90e..29198d8 100644
--- a/src/scoring/score.ts
+++ b/src/scoring/score.ts
@@ -1,6 +1,6 @@
 import type { CheckResult, ReportResult } from '../types.js';
 import { CATEGORIES } from '../constants.js';
-import type { CheckScore, Grade, ScoreCap, ScoreResult } from './types.js';
+import type { CategoryScore, CheckScore, Grade, ScoreCap, ScoreResult } from './types.js';
 import { getCheckWeight } from './weights.js';
 import { getCheckProportion } from './proportions.js';
 import { getCoefficient } from './coefficients.js';
@@ -8,6 +8,25 @@ import { evaluateDiagnostics } from './diagnostics.js';
 import { getResolution } from './resolutions.js';
 import { computeTagScores } from './tag-scores.js';
 
+const PAGE_LEVEL_CHECKS = new Set([
+  'llms-txt-directive-html',
+  'llms-txt-directive-md',
+  'markdown-url-support',
+  'content-negotiation',
+  'markdown-code-fence-validity',
+  'page-size-markdown',
+  'page-size-html',
+  'markdown-content-parity',
+  'content-start-position',
+  'tabbed-content-serialization',
+  'section-header-quality',
+  'http-status-codes',
+  'redirect-behavior',
+  'rendering-strategy',
+  'auth-gate-detection',
+  'cache-header-hygiene',
+]);
+
 /**
  * Compute a score from a report result.
  *
@@ -20,6 +39,11 @@ export function computeScore(report: ReportResult): ScoreResult {
     resultMap.set(r.id, r);
   }
 
+  // Determine if page-level scores lack meaningful data
+  const isDiscoveryBased =
+    report.samplingStrategy === 'random' || report.samplingStrategy === 'deterministic';
+  const insufficientData = isDiscoveryBased && report.testedPages === 1;
+
   // Compute per-check scores
   const checkScores: Record<string, CheckScore> = {};
 
@@ -36,6 +60,7 @@ export function computeScore(report: ReportResult): ScoreResult {
     const coefficient = getCoefficient(result.id, resultMap);
     const effectiveWeight = weight.weight * coefficient;
     const earnedScore = proportionResult.proportion * effectiveWeight;
+    const isNotApplicable = insufficientData && PAGE_LEVEL_CHECKS.has(result.id);
 
     checkScores[result.id] = {
       baseWeight: weight.weight,
@@ -44,14 +69,16 @@ export function computeScore(report: ReportResult): ScoreResult {
       proportion: proportionResult.proportion,
       earnedScore,
       maxScore: effectiveWeight,
+      scoreDisplayMode: isNotApplicable ? 'notApplicable' : 'numeric',
     };
   }
 
-  // Overall score
+  // Overall score (exclude notApplicable checks)
   let totalEarned = 0;
   let totalMax = 0;
 
   for (const cs of Object.values(checkScores)) {
+    if (cs.scoreDisplayMode === 'notApplicable') continue;
     totalEarned += cs.earnedScore;
     totalMax += cs.maxScore;
   }
@@ -59,7 +86,7 @@ export function computeScore(report: ReportResult): ScoreResult {
   const rawScore = totalMax > 0 ? (totalEarned / totalMax) * 100 : 0;
 
   // Diagnostics (evaluated before caps so no-viable-path can trigger a cap)
-  const diagnostics = evaluateDiagnostics(resultMap);
+  const diagnostics = evaluateDiagnostics(resultMap, report);
   const triggeredDiagnostics = new Set(diagnostics.map((d) => d.id));
 
   // Apply critical check caps
@@ -67,24 +94,33 @@ export function computeScore(report: ReportResult): ScoreResult {
   const overall = Math.round(cap ? Math.min(rawScore, cap.cap) : rawScore);
 
   // Category scores
-  const categoryScores: Record<string, { score: number; grade: Grade }> = {};
+  const categoryScores: Record<string, CategoryScore> = {};
 
   for (const cat of CATEGORIES) {
     let catEarned = 0;
     let catMax = 0;
+    let hasNumericCheck = false;
+    let hasScoredCheck = false;
 
     for (const result of report.results) {
       if (result.category !== cat.id) continue;
       const cs = checkScores[result.id];
       if (!cs) continue;
+      hasScoredCheck = true;
+      if (cs.scoreDisplayMode === 'notApplicable') continue;
+      hasNumericCheck = true;
       catEarned += cs.earnedScore;
       catMax += cs.maxScore;
     }
 
-    categoryScores[cat.id] = {
-      score: catMax > 0 ? Math.round((catEarned / catMax) * 100) : 0,
-      grade: toGrade(catMax > 0 ? Math.round((catEarned / catMax) * 100) : 0),
-    };
+    if (hasScoredCheck && !hasNumericCheck) {
+      categoryScores[cat.id] = { score: null, grade: null };
+    } else {
+      categoryScores[cat.id] = {
+        score: catMax > 0 ? Math.round((catEarned / catMax) * 100) : 0,
+        grade: toGrade(catMax > 0 ? Math.round((catEarned / catMax) * 100) : 0),
+      };
+    }
   }
 
   // Resolutions
@@ -139,9 +175,10 @@ function computeCap(
   }
 
   // Multi-page critical checks: rendering-strategy, auth-gate-detection
+  // Skip N/A checks — insufficient data to justify a cap.
   for (const checkId of ['rendering-strategy', 'auth-gate-detection']) {
     const cs = checkScores[checkId];
-    if (!cs) continue;
+    if (!cs || cs.scoreDisplayMode === 'notApplicable') continue;
 
     if (cs.proportion <= 0.25) {
       caps.push({
diff --git a/src/scoring/types.ts b/src/scoring/types.ts
index 2212b05..a26f8ac 100644
--- a/src/scoring/types.ts
+++ b/src/scoring/types.ts
@@ -2,6 +2,8 @@ export type Grade = 'A+' | 'A' | 'B' | 'C' | 'D' | 'F';
 
 export type DiagnosticSeverity = 'critical' | 'warning' | 'info';
 
+export type ScoreDisplayMode = 'numeric' | 'notApplicable';
+
 export interface CheckScore {
   /** Base weight from the tier assignment. */
   baseWeight: number;
@@ -15,11 +17,13 @@ export interface CheckScore {
   earnedScore: number;
   /** effectiveWeight (the maximum this check could earn). */
   maxScore: number;
+  /** Whether this score is meaningful. 'notApplicable' when insufficient data. */
+  scoreDisplayMode: ScoreDisplayMode;
 }
 
 export interface CategoryScore {
-  score: number;
-  grade: Grade;
+  score: number | null;
+  grade: Grade | null;
 }
 
 export interface TagCheckBreakdown {
diff --git a/src/types.ts b/src/types.ts
index 792517f..9805460 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -176,6 +176,10 @@ export interface ReportResult {
   urlTags?: Record<string, string>;
   /** Which discovery methods contributed to the page URL set. */
   discoverySources?: DiscoverySource[];
+  /** Number of pages tested by page-level checks. */
+  testedPages?: number;
+  /** The sampling strategy used for this run. */
+  samplingStrategy?: SamplingStrategy;
 }
 
 export interface AgentDocsConfig {
diff --git a/test/unit/cli/scorecard-formatter.test.ts b/test/unit/cli/scorecard-formatter.test.ts
index 311fd1b..ba1f199 100644
--- a/test/unit/cli/scorecard-formatter.test.ts
+++ b/test/unit/cli/scorecard-formatter.test.ts
@@ -48,6 +48,7 @@ function makeScoreResult(overrides?: Partial<ScoreResult>): ScoreResult {
         proportion: 1,
         earnedScore: 10,
         maxScore: 10,
+        scoreDisplayMode: 'numeric',
       },
       'llms-txt-valid': {
         baseWeight: 4,
@@ -56,6 +57,7 @@ function makeScoreResult(overrides?: Partial<ScoreResult>): ScoreResult {
         proportion: 0.75,
         earnedScore: 3,
         maxScore: 4,
+        scoreDisplayMode: 'numeric',
       },
       'markdown-url-support': {
         baseWeight: 7,
@@ -64,6 +66,7 @@ function makeScoreResult(overrides?: Partial<ScoreResult>): ScoreResult {
         proportion: 0,
         earnedScore: 0,
         maxScore: 7,
+        scoreDisplayMode: 'numeric',
       },
     },
     diagnostics: [],
@@ -412,4 +415,56 @@ describe('formatScorecard', () => {
     const output = formatScorecard(makeReport(), makeScoreResult());
     expect(output).not.toContain('Tag Scores:');
   });
+
+  it('renders null category scores as N/A dash', () => {
+    const score = makeScoreResult({
+      categoryScores: {
+        'content-discoverability': { score: 80, grade: 'B' },
+        'markdown-availability': { score: null, grade: null },
+        'page-size': { score: null, grade: null },
+        'url-stability': { score: null, grade: null },
+      },
+    });
+    const output = formatScorecard(makeReport(), score);
+    expect(output).toContain('Content Discoverability');
+    expect(output).toContain('80 / 100');
+    expect(output).toContain('(N/A)');
+    // Should not show a numeric score for null categories
+    expect(output).not.toContain('null / 100');
+  });
+
+  it('renders N/A scorecard end-to-end from single-page discovery report', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'llms-txt-exists',
+          category: 'content-discoverability',
+          status: 'fail',
+          message: 'No llms.txt found',
+        },
+        {
+          id: 'page-size-html',
+          category: 'page-size',
+          status: 'pass',
+          message: '1 page tested, all pass',
+        },
+        {
+          id: 'http-status-codes',
+          category: 'url-stability',
+          status: 'pass',
+          message: '1 page tested',
+        },
+      ],
+      summary: { total: 3, pass: 2, warn: 0, fail: 1, skip: 0, error: 0 },
+      testedPages: 1,
+      samplingStrategy: 'random',
+    });
+    // Let computeScore handle it (no pre-built scoreResult)
+    const output = formatScorecard(report);
+    // Page-level categories should show as N/A
+    expect(output).toContain('(N/A)');
+    // Should fire the single-page-sample diagnostic
+    expect(output).toContain('Interaction Diagnostics');
+    expect(output).toContain('Only one page was discovered');
+  });
 });
diff --git a/test/unit/scoring/diagnostics.test.ts b/test/unit/scoring/diagnostics.test.ts
index 5253da1..5791267 100644
--- a/test/unit/scoring/diagnostics.test.ts
+++ b/test/unit/scoring/diagnostics.test.ts
@@ -1,6 +1,6 @@
 import { describe, it, expect } from 'vitest';
 import { evaluateDiagnostics } from '../../../src/scoring/diagnostics.js';
-import type { CheckResult } from '../../../src/types.js';
+import type { CheckResult, ReportResult } from '../../../src/types.js';
 
 function r(
   id: string,
@@ -14,6 +14,18 @@ function resultsMap(...results: CheckResult[]): Map<string, CheckResult> {
   return new Map(results.map((res) => [res.id, res]));
 }
 
+function defaultReport(): ReportResult {
+  return {
+    url: 'https://example.com',
+    timestamp: new Date().toISOString(),
+    specUrl: 'https://agentdocsspec.com/spec/',
+    results: [],
+    summary: { total: 0, pass: 0, warn: 0, fail: 0, skip: 0, error: 0 },
+    samplingStrategy: 'random',
+    testedPages: 10,
+  };
+}
+
 describe('diagnostics', () => {
   describe('markdown-undiscoverable', () => {
     it('triggers when markdown supported but no directive and no content negotiation', () => {
@@ -22,7 +34,7 @@ describe('diagnostics', () => {
         r('content-negotiation', 'fail'),
         r('llms-txt-directive-html', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeDefined();
     });
 
@@ -32,7 +44,7 @@ describe('diagnostics', () => {
         r('content-negotiation', 'fail'),
         r('llms-txt-directive-html', 'pass'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeUndefined();
     });
 
@@ -42,7 +54,7 @@ describe('diagnostics', () => {
         r('content-negotiation', 'pass'),
         r('llms-txt-directive-html', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeUndefined();
       expect(diags.find((d) => d.id === 'markdown-partially-discoverable')).toBeDefined();
     });
@@ -53,7 +65,7 @@ describe('diagnostics', () => {
         r('content-negotiation', 'fail'),
         r('llms-txt-directive-html', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeUndefined();
     });
   });
@@ -65,7 +77,7 @@ describe('diagnostics', () => {
         r('content-negotiation', 'pass'),
         r('llms-txt-directive-html', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       const diag = diags.find((d) => d.id === 'markdown-partially-discoverable');
       expect(diag).toBeDefined();
       expect(diag!.severity).toBe('warning');
@@ -77,7 +89,7 @@ describe('diagnostics', () => {
         r('content-negotiation', 'pass'),
         r('llms-txt-directive-html', 'pass'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'markdown-partially-discoverable')).toBeUndefined();
     });
 
@@ -87,7 +99,7 @@ describe('diagnostics', () => {
         r('content-negotiation', 'fail'),
         r('llms-txt-directive-html', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'markdown-partially-discoverable')).toBeUndefined();
       expect(diags.find((d) => d.id === 'markdown-undiscoverable')).toBeDefined();
     });
@@ -101,7 +113,7 @@ describe('diagnostics', () => {
           sizes: [{ characters: 250_000 }],
         }),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       const diag = diags.find((d) => d.id === 'truncated-index');
       expect(diag).toBeDefined();
       expect(diag!.message).toContain('250,000');
@@ -110,7 +122,7 @@ describe('diagnostics', () => {
 
     it('does not trigger when llms.txt is absent', () => {
       const results = resultsMap(r('llms-txt-exists', 'fail'), r('llms-txt-size', 'fail'));
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'truncated-index')).toBeUndefined();
     });
   });
@@ -124,7 +136,7 @@ describe('diagnostics', () => {
           spaShells: 15,
         }),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'spa-shell-html-invalid')).toBeDefined();
     });
 
@@ -136,7 +148,7 @@ describe('diagnostics', () => {
           spaShells: 0,
         }),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'spa-shell-html-invalid')).toBeUndefined();
     });
 
@@ -149,7 +161,7 @@ describe('diagnostics', () => {
         }),
         r('markdown-url-support', 'pass'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       const diag = diags.find((d) => d.id === 'spa-shell-html-invalid');
       expect(diag!.message).toContain('markdown path still works');
     });
@@ -170,7 +182,7 @@ describe('diagnostics', () => {
         r('llms-txt-directive-md', 'fail'),
         r('llms-txt-links-markdown', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'no-viable-path')).toBeDefined();
       expect(diags.find((d) => d.id === 'no-viable-path')!.severity).toBe('critical');
     });
@@ -181,7 +193,7 @@ describe('diagnostics', () => {
         r('rendering-strategy', 'fail'),
         r('markdown-url-support', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'no-viable-path')).toBeDefined();
     });
 
@@ -192,7 +204,7 @@ describe('diagnostics', () => {
         r('rendering-strategy', 'fail'),
         r('markdown-url-support', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'no-viable-path')).toBeUndefined();
     });
 
@@ -207,7 +219,7 @@ describe('diagnostics', () => {
         }),
         r('markdown-url-support', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       const diag = diags.find((d) => d.id === 'no-viable-path');
       expect(diag).toBeDefined();
       expect(diag!.message).toContain('0% of links resolve');
@@ -225,7 +237,7 @@ describe('diagnostics', () => {
         r('content-negotiation', 'pass'),
         r('llms-txt-directive-html', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'markdown-partially-discoverable')).toBeDefined();
       expect(diags.find((d) => d.id === 'no-viable-path')).toBeDefined();
     });
@@ -237,7 +249,7 @@ describe('diagnostics', () => {
         r('rendering-strategy', 'fail'),
         r('markdown-url-support', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'no-viable-path')).toBeUndefined();
     });
   });
@@ -248,7 +260,7 @@ describe('diagnostics', () => {
         r('auth-gate-detection', 'fail'),
         r('auth-alternative-access', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       const diag = diags.find((d) => d.id === 'auth-no-alternative');
       expect(diag).toBeDefined();
       expect(diag!.severity).toBe('critical');
@@ -259,7 +271,7 @@ describe('diagnostics', () => {
         r('auth-gate-detection', 'fail'),
         r('auth-alternative-access', 'pass'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'auth-no-alternative')).toBeUndefined();
     });
   });
@@ -270,7 +282,7 @@ describe('diagnostics', () => {
         r('page-size-html', 'fail', { failBucket: 12 }),
         r('markdown-url-support', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       const diag = diags.find((d) => d.id === 'page-size-no-markdown-escape');
       expect(diag).toBeDefined();
       expect(diag!.message).toContain('12 pages');
@@ -285,7 +297,7 @@ describe('diagnostics', () => {
         r('llms-txt-directive-md', 'fail'),
         r('llms-txt-links-markdown', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'page-size-no-markdown-escape')).toBeDefined();
     });
 
@@ -296,7 +308,7 @@ describe('diagnostics', () => {
         r('content-negotiation', 'pass'),
         r('llms-txt-directive-html', 'fail'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'page-size-no-markdown-escape')).toBeDefined();
     });
 
@@ -306,8 +318,181 @@ describe('diagnostics', () => {
         r('markdown-url-support', 'pass'),
         r('llms-txt-directive-html', 'pass'),
       );
-      const diags = evaluateDiagnostics(results);
+      const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'page-size-no-markdown-escape')).toBeUndefined();
     });
   });
+
+  // --- New diagnostics ---
+
+  describe('single-page-sample', () => {
+    it('triggers when testedPages is 1 and strategy is discovery-based', () => {
+      const report = { ...defaultReport(), testedPages: 1, samplingStrategy: 'random' as const };
+      const diags = evaluateDiagnostics(resultsMap(), report);
+      const diag = diags.find((d) => d.id === 'single-page-sample');
+      expect(diag).toBeDefined();
+      expect(diag!.severity).toBe('warning');
+    });
+
+    it('triggers with deterministic sampling', () => {
+      const report = {
+        ...defaultReport(),
+        testedPages: 1,
+        samplingStrategy: 'deterministic' as const,
+      };
+      const diags = evaluateDiagnostics(resultsMap(), report);
+      expect(diags.find((d) => d.id === 'single-page-sample')).toBeDefined();
+    });
+
+    it('does not trigger with curated sampling', () => {
+      const report = { ...defaultReport(), testedPages: 1, samplingStrategy: 'curated' as const };
+      const diags = evaluateDiagnostics(resultsMap(), report);
+      expect(diags.find((d) => d.id === 'single-page-sample')).toBeUndefined();
+    });
+
+    it('does not trigger with none sampling', () => {
+      const report = { ...defaultReport(), testedPages: 1, samplingStrategy: 'none' as const };
+      const diags = evaluateDiagnostics(resultsMap(), report);
+      expect(diags.find((d) => d.id === 'single-page-sample')).toBeUndefined();
+    });
+
+    it('does not trigger when testedPages > 1', () => {
+      const report = { ...defaultReport(), testedPages: 5, samplingStrategy: 'random' as const };
+      const diags = evaluateDiagnostics(resultsMap(), report);
+      expect(diags.find((d) => d.id === 'single-page-sample')).toBeUndefined();
+    });
+
+    it('does not trigger when testedPages is undefined', () => {
+      const report = { ...defaultReport(), testedPages: undefined };
+      const diags = evaluateDiagnostics(resultsMap(), report);
+      expect(diags.find((d) => d.id === 'single-page-sample')).toBeUndefined();
+    });
+  });
+
+  describe('cross-origin-llms-txt', () => {
+    it('triggers when all links are cross-origin', () => {
+      const results = resultsMap(
+        r('llms-txt-links-resolve', 'warn', {
+          sameOrigin: { total: 0 },
+          crossOrigin: { total: 15, dominantOrigin: 'https://docs.example.com' },
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'cross-origin-llms-txt');
+      expect(diag).toBeDefined();
+      expect(diag!.severity).toBe('warning');
+      expect(diag!.message).toContain('15 links');
+      expect(diag!.message).toContain('https://docs.example.com');
+    });
+
+    it('does not trigger when there are same-origin links', () => {
+      const results = resultsMap(
+        r('llms-txt-links-resolve', 'pass', {
+          sameOrigin: { total: 10 },
+          crossOrigin: { total: 5 },
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      expect(diags.find((d) => d.id === 'cross-origin-llms-txt')).toBeUndefined();
+    });
+
+    it('does not trigger when check is skipped', () => {
+      const results = resultsMap(r('llms-txt-links-resolve', 'skip'));
+      const diags = evaluateDiagnostics(results, defaultReport());
+      expect(diags.find((d) => d.id === 'cross-origin-llms-txt')).toBeUndefined();
+    });
+
+    it('does not trigger when there are no cross-origin links', () => {
+      const results = resultsMap(
+        r('llms-txt-links-resolve', 'pass', {
+          sameOrigin: { total: 0 },
+          crossOrigin: { total: 0 },
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      expect(diags.find((d) => d.id === 'cross-origin-llms-txt')).toBeUndefined();
+    });
+  });
+
+  describe('gzipped-sitemap-skipped', () => {
+    it('triggers when a check has a gzipped sitemap warning', () => {
+      const results = resultsMap(
+        r('page-size-html', 'pass', {
+          discoveryWarnings: [
+            'Skipped gzipped sitemap (not supported): https://example.com/sitemap.xml.gz',
+          ],
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'gzipped-sitemap-skipped');
+      expect(diag).toBeDefined();
+      expect(diag!.severity).toBe('info');
+      expect(diag!.message).toContain('sitemap.xml.gz');
+    });
+
+    it('does not trigger without gzipped sitemap warnings', () => {
+      const results = resultsMap(
+        r('page-size-html', 'pass', {
+          discoveryWarnings: ['Some other warning'],
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      expect(diags.find((d) => d.id === 'gzipped-sitemap-skipped')).toBeUndefined();
+    });
+
+    it('does not trigger with no discovery warnings', () => {
+      const results = resultsMap(r('page-size-html', 'pass'));
+      const diags = evaluateDiagnostics(results, defaultReport());
+      expect(diags.find((d) => d.id === 'gzipped-sitemap-skipped')).toBeUndefined();
+    });
+  });
+
+  describe('rate-limiting-severe', () => {
+    it('triggers when >20% of requests are rate-limited', () => {
+      const results = resultsMap(
+        r('llms-txt-links-resolve', 'warn', {
+          testedLinks: 50,
+          rateLimited: 15,
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'rate-limiting-severe');
+      expect(diag).toBeDefined();
+      expect(diag!.severity).toBe('warning');
+      expect(diag!.message).toContain('30%');
+    });
+
+    it('does not trigger when rate limiting is below threshold', () => {
+      const results = resultsMap(
+        r('llms-txt-links-resolve', 'pass', {
+          testedLinks: 50,
+          rateLimited: 5,
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      expect(diags.find((d) => d.id === 'rate-limiting-severe')).toBeUndefined();
+    });
+
+    it('aggregates rate limiting across multiple checks', () => {
+      const results = resultsMap(
+        r('llms-txt-links-resolve', 'pass', {
+          testedLinks: 50,
+          rateLimited: 5,
+        }),
+        r('markdown-url-support', 'warn', {
+          pageResults: Array.from({ length: 10 }, () => ({})),
+          rateLimited: 8,
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      // 13/60 = 21.7% -> triggers
+      expect(diags.find((d) => d.id === 'rate-limiting-severe')).toBeDefined();
+    });
+
+    it('does not trigger when no checks have rate limiting data', () => {
+      const results = resultsMap(r('page-size-html', 'pass'));
+      const diags = evaluateDiagnostics(results, defaultReport());
+      expect(diags.find((d) => d.id === 'rate-limiting-severe')).toBeUndefined();
+    });
+  });
 });
diff --git a/test/unit/scoring/score.test.ts b/test/unit/scoring/score.test.ts
index d1de347..243252c 100644
--- a/test/unit/scoring/score.test.ts
+++ b/test/unit/scoring/score.test.ts
@@ -1,6 +1,6 @@
 import { describe, it, expect } from 'vitest';
 import { computeScore, toGrade } from '../../../src/scoring/score.js';
-import type { CheckResult, ReportResult } from '../../../src/types.js';
+import type { CheckResult, ReportResult, SamplingStrategy } from '../../../src/types.js';
 
 function makeResult(
   id: string,
@@ -11,7 +11,10 @@ function makeResult(
   return { id, category, status, message: `${id}: ${status}`, details };
 }
 
-function makeReport(results: CheckResult[]): ReportResult {
+function makeReport(
+  results: CheckResult[],
+  overrides?: { testedPages?: number; samplingStrategy?: SamplingStrategy },
+): ReportResult {
   const summary = { total: results.length, pass: 0, warn: 0, fail: 0, skip: 0, error: 0 };
   for (const r of results) {
     summary[r.status]++;
@@ -22,6 +25,7 @@ function makeReport(results: CheckResult[]): ReportResult {
     specUrl: 'https://agentdocsspec.com/spec/',
     results,
     summary,
+    ...overrides,
   };
 }
 
@@ -452,4 +456,131 @@ describe('computeScore', () => {
       expect(score.tagScores).toBeUndefined();
     });
   });
+
+  describe('single-page scoring (scoreDisplayMode)', () => {
+    it('marks page-level checks as notApplicable when testedPages=1 and discovery-based', () => {
+      const results: CheckResult[] = [
+        makeResult('llms-txt-exists', 'content-discoverability', 'pass'),
+        makeResult('page-size-html', 'page-size', 'pass'),
+        makeResult('rendering-strategy', 'page-size', 'pass', {
+          serverRendered: 1,
+          sparseContent: 0,
+          spaShells: 0,
+        }),
+      ];
+      const score = computeScore(
+        makeReport(results, { testedPages: 1, samplingStrategy: 'random' }),
+      );
+      expect(score.checkScores['llms-txt-exists'].scoreDisplayMode).toBe('numeric');
+      expect(score.checkScores['page-size-html'].scoreDisplayMode).toBe('notApplicable');
+      expect(score.checkScores['rendering-strategy'].scoreDisplayMode).toBe('notApplicable');
+    });
+
+    it('does not mark checks as notApplicable with curated sampling', () => {
+      const results: CheckResult[] = [makeResult('page-size-html', 'page-size', 'pass')];
+      const score = computeScore(
+        makeReport(results, { testedPages: 1, samplingStrategy: 'curated' }),
+      );
+      expect(score.checkScores['page-size-html'].scoreDisplayMode).toBe('numeric');
+    });
+
+    it('does not mark checks as notApplicable when testedPages > 1', () => {
+      const results: CheckResult[] = [makeResult('page-size-html', 'page-size', 'pass')];
+      const score = computeScore(
+        makeReport(results, { testedPages: 5, samplingStrategy: 'random' }),
+      );
+      expect(score.checkScores['page-size-html'].scoreDisplayMode).toBe('numeric');
+    });
+
+    it('nulls category scores when all checks in category are notApplicable', () => {
+      const results: CheckResult[] = [
+        makeResult('llms-txt-exists', 'content-discoverability', 'pass'),
+        makeResult('http-status-codes', 'url-stability', 'pass'),
+        makeResult('redirect-behavior', 'url-stability', 'pass'),
+      ];
+      const score = computeScore(
+        makeReport(results, { testedPages: 1, samplingStrategy: 'random' }),
+      );
+      // url-stability has only page-level checks -> null
+      expect(score.categoryScores['url-stability'].score).toBeNull();
+      expect(score.categoryScores['url-stability'].grade).toBeNull();
+      // content-discoverability has site-level check -> numeric
+      expect(score.categoryScores['content-discoverability'].score).toBe(100);
+    });
+
+    it('computes mixed category scores from numeric checks only', () => {
+      const results: CheckResult[] = [
+        makeResult('llms-txt-coverage', 'observability', 'pass'),
+        makeResult('cache-header-hygiene', 'observability', 'pass'),
+      ];
+      const score = computeScore(
+        makeReport(results, { testedPages: 1, samplingStrategy: 'random' }),
+      );
+      // observability: llms-txt-coverage is site-level (numeric), cache-header-hygiene is page-level (N/A)
+      // score computed from only llms-txt-coverage
+      expect(score.categoryScores['observability'].score).toBe(100);
+    });
+
+    it('excludes notApplicable checks from overall score', () => {
+      const results: CheckResult[] = [
+        makeResult('llms-txt-exists', 'content-discoverability', 'pass'),
+        makeResult('page-size-html', 'page-size', 'fail', {
+          passBucket: 0,
+          warnBucket: 0,
+          failBucket: 1,
+        }),
+      ];
+      // With N/A: only llms-txt-exists counts (pass) -> 100
+      const scoreNA = computeScore(
+        makeReport(results, { testedPages: 1, samplingStrategy: 'random' }),
+      );
+      // Without N/A: both count, page-size-html fails -> less than 100
+      const scoreNormal = computeScore(
+        makeReport(results, { testedPages: 5, samplingStrategy: 'random' }),
+      );
+      expect(scoreNA.overall).toBe(100);
+      expect(scoreNormal.overall).toBeLessThan(100);
+    });
+
+    it('fires single-page-sample diagnostic when N/A condition met', () => {
+      const results: CheckResult[] = [
+        makeResult('llms-txt-exists', 'content-discoverability', 'fail'),
+        makeResult('page-size-html', 'page-size', 'pass'),
+      ];
+      const score = computeScore(
+        makeReport(results, { testedPages: 1, samplingStrategy: 'random' }),
+      );
+      expect(score.diagnostics.find((d) => d.id === 'single-page-sample')).toBeDefined();
+    });
+
+    it('all checks get numeric scoreDisplayMode by default (no sampling info)', () => {
+      const results: CheckResult[] = [makeResult('page-size-html', 'page-size', 'pass')];
+      const score = computeScore(makeReport(results));
+      expect(score.checkScores['page-size-html'].scoreDisplayMode).toBe('numeric');
+    });
+
+    it('does not apply rendering-strategy cap when check is notApplicable', () => {
+      const results: CheckResult[] = [
+        makeResult('llms-txt-exists', 'content-discoverability', 'pass'),
+        makeResult('rendering-strategy', 'page-size', 'fail', {
+          serverRendered: 0,
+          sparseContent: 0,
+          spaShells: 1,
+        }),
+      ];
+      // With N/A: rendering-strategy is notApplicable, cap should NOT fire
+      const scoreNA = computeScore(
+        makeReport(results, { testedPages: 1, samplingStrategy: 'random' }),
+      );
+      expect(scoreNA.checkScores['rendering-strategy'].scoreDisplayMode).toBe('notApplicable');
+      expect(scoreNA.cap).toBeUndefined();
+
+      // Without N/A: same data, cap SHOULD fire
+      const scoreNormal = computeScore(
+        makeReport(results, { testedPages: 10, samplingStrategy: 'random' }),
+      );
+      expect(scoreNormal.cap).toBeDefined();
+      expect(scoreNormal.cap!.cap).toBe(39);
+    });
+  });
 });

From 3dd8e50d29c30f8458e989badc5dff7cf717823d Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sat, 25 Apr 2026 17:13:41 -0400
Subject: [PATCH 06/13] Raise the tested pages floor to 5 for discoverability
 variance

---
 SCORING.md                                |  6 +++---
 docs/agent-score-calculation.md           |  4 ++--
 docs/interaction-diagnostics.md           |  4 ++--
 docs/reference/scoring-api.md             |  2 +-
 scoring-reference.md                      | 10 +++++-----
 src/constants.ts                          |  3 +++
 src/scoring/diagnostics.ts                | 22 ++++++++++++++++------
 src/scoring/score.ts                      |  7 +++++--
 test/unit/cli/scorecard-formatter.test.ts |  2 +-
 test/unit/scoring/diagnostics.test.ts     | 11 ++++++++++-
 test/unit/scoring/score.test.ts           |  4 ++--
 11 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/SCORING.md b/SCORING.md
index 1fa67b5..f9523b5 100644
--- a/SCORING.md
+++ b/SCORING.md
@@ -125,7 +125,7 @@ Rounded to the nearest integer. Checks marked as `notApplicable` (see below) are
 
 ### Insufficient-data handling (scoreDisplayMode)
 
-When the tool discovers only a single page using automatic discovery (`random` or `deterministic` sampling), page-level check scores are unreliable because they represent one page out of potentially thousands. In this case:
+When automatic discovery (`random` or `deterministic` sampling) finds fewer than 5 pages, page-level check scores are unreliable because they represent a handful of pages out of potentially thousands. In this case:
 
 - **Page-level checks** get `scoreDisplayMode: "notApplicable"` and are excluded from the overall score calculation.
 - **Site-level checks** (llms.txt checks, coverage, auth-alternative-access) remain `scoreDisplayMode: "numeric"` and are scored normally.
@@ -237,9 +237,9 @@ Some problems only become visible when you look at multiple checks together. The
 
 ### Single-page sample
 
-**Triggers when** automatic discovery (`random` or `deterministic` sampling) found only one page to test.
+**Triggers when** automatic discovery (`random` or `deterministic` sampling) found fewer than 5 pages to test.
 
-**What it means**: Page-level category scores (page size, content structure, URL stability, etc.) are based on a single page and may not represent the site. These categories are marked as N/A in the score.
+**What it means**: Page-level category scores (page size, content structure, URL stability, etc.) are based on too few pages to be representative. These categories are marked as N/A in the score.
 
 **What to do**: If your site has an llms.txt, ensure it contains working links so the tool can discover more pages. If testing a preview deployment, use `--canonical-origin` to rewrite cross-origin llms.txt links. You can also provide specific pages with `--urls`.
 
diff --git a/docs/agent-score-calculation.md b/docs/agent-score-calculation.md
index a9cb647..5ec86a1 100644
--- a/docs/agent-score-calculation.md
+++ b/docs/agent-score-calculation.md
@@ -162,7 +162,7 @@ The `rendering-strategy` and `auth-gate-detection` caps do not apply when the ch
 
 ## Insufficient data
 
-When automatic page discovery finds only a single page (using `random` or `deterministic` sampling), page-level check scores are unreliable because they represent one page out of potentially thousands. In this case:
+When automatic page discovery finds fewer than 5 pages (using `random` or `deterministic` sampling), page-level check scores are unreliable because they represent a handful of pages out of potentially thousands. In this case:
 
 - **Page-level checks** (those that test sampled pages like `page-size-html`, `rendering-strategy`, `http-status-codes`, etc.) are marked as "not applicable" and excluded from the score.
 - **Site-level checks** (llms.txt checks, coverage, auth-alternative-access) are scored normally.
@@ -170,7 +170,7 @@ When automatic page discovery finds only a single page (using `random` or `deter
 
 This typically happens when a site has no llms.txt or its llms.txt links point to a different origin (common with preview deployments). A [`single-page-sample` diagnostic](/interaction-diagnostics#single-page-sample) fires to explain the situation.
 
-This behavior does not apply when you explicitly choose pages with `--urls` or `--sampling curated`, or when you use `--sampling none`. If you intentionally test a single page, the score reflects that page.
+This behavior does not apply when you explicitly choose pages with `--urls` or `--sampling curated`, or when you use `--sampling none`. If you intentionally select pages, the score reflects those pages regardless of count.
 
 ## Cluster coefficients
 
diff --git a/docs/interaction-diagnostics.md b/docs/interaction-diagnostics.md
index 432a784..929a339 100644
--- a/docs/interaction-diagnostics.md
+++ b/docs/interaction-diagnostics.md
@@ -76,9 +76,9 @@ These diagnostics appear in the "Interaction Diagnostics" section of the `--form
 
 ## Single-page sample
 
-**Triggers when** automatic page discovery (`random` or `deterministic` sampling) found only one page to test.
+**Triggers when** automatic page discovery (`random` or `deterministic` sampling) found fewer than 5 pages to test.
 
-**What it means**: Page-level category scores (Page Size, Content Structure, URL Stability, etc.) are based on a single page and may not represent the site. These categories are marked as N/A in the score rather than showing potentially misleading numbers.
+**What it means**: Page-level category scores (Page Size, Content Structure, URL Stability, etc.) are based on too few pages to be representative. These categories are marked as N/A in the score rather than showing potentially misleading numbers.
 
 **What to do**: If your site has an llms.txt, ensure it contains working links so the tool can discover more pages. If testing a preview deployment, use `--canonical-origin` to rewrite cross-origin llms.txt links. You can also provide specific pages with `--urls` to test exactly the pages you care about.
 
diff --git a/docs/reference/scoring-api.md b/docs/reference/scoring-api.md
index bbaa142..f5ad256 100644
--- a/docs/reference/scoring-api.md
+++ b/docs/reference/scoring-api.md
@@ -46,7 +46,7 @@ This is the same function; the subpath is provided for consumers who want a narr
 | `resolutions`    | `Record<string, string>`        | Fix suggestions keyed by check ID                                                                                                     |
 | `tagScores`      | `Record<string, TagScore>`      | Per-tag aggregate scores (present when curated pages have tags)                                                                       |
 
-Each `CheckScore` includes a `scoreDisplayMode` field (`"numeric"` or `"notApplicable"`). When automatic page discovery finds only one page, page-level checks are marked `"notApplicable"` and excluded from overall and category score calculations. See [Insufficient data](/agent-score-calculation#insufficient-data) for details.
+Each `CheckScore` includes a `scoreDisplayMode` field (`"numeric"` or `"notApplicable"`). When automatic page discovery finds fewer than 5 pages, page-level checks are marked `"notApplicable"` and excluded from overall and category score calculations. See [Insufficient data](/agent-score-calculation#insufficient-data) for details.
 
 ## TagScore
 
diff --git a/scoring-reference.md b/scoring-reference.md
index 07c376b..ec11164 100644
--- a/scoring-reference.md
+++ b/scoring-reference.md
@@ -211,7 +211,7 @@ Each `CheckScore` has a `scoreDisplayMode` field:
 The `notApplicable` mode triggers when all of:
 
 - `samplingStrategy` is `random` or `deterministic` (discovery-based).
-- `testedPages` equals 1.
+- `testedPages` is less than `MIN_PAGES_FOR_SCORING` (default 5).
 - The check is page-level (tests sampled pages, not site-level resources).
 
 Page-level checks: `llms-txt-directive-html`, `llms-txt-directive-md`,
@@ -555,10 +555,10 @@ in dependency order: `markdown-undiscoverable` and
 
 - **Severity**: warning
 - **Triggers when**: `samplingStrategy` is `random` or `deterministic` AND
-  `testedPages` equals 1.
-- **Message**: Only one page was discovered and tested. Page-level category
-  scores are based on a single page and may not represent the site. These
-  categories are marked as N/A in the score.
+  `testedPages` is less than `MIN_PAGES_FOR_SCORING` (default 5).
+- **Message**: Only {n} page(s) discovered and tested (minimum 5 needed for
+  reliable scoring). Page-level category scores may not represent the site.
+  These categories are marked as N/A in the score.
 - **Resolution**: If your site has an llms.txt, ensure it contains working
   links so the tool can discover more pages. If testing a preview deployment,
   use --canonical-origin to rewrite cross-origin llms.txt links. You can also
diff --git a/src/constants.ts b/src/constants.ts
index 670e1a9..1108489 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -40,5 +40,8 @@ export const DEFAULT_COVERAGE_PASS_THRESHOLD = 95;
 /** Default llms-txt-coverage warn threshold (percentage). */
 export const DEFAULT_COVERAGE_WARN_THRESHOLD = 80;
 
+/** Minimum discovered pages before page-level scores are considered meaningful. */
+export const MIN_PAGES_FOR_SCORING = 5;
+
 /** Base URL for the Agent-Friendly Documentation Spec. */
 export const SPEC_BASE_URL = 'https://agentdocsspec.com/spec/';
diff --git a/src/scoring/diagnostics.ts b/src/scoring/diagnostics.ts
index 77f0ddc..6dfeb97 100644
--- a/src/scoring/diagnostics.ts
+++ b/src/scoring/diagnostics.ts
@@ -1,5 +1,6 @@
 import type { CheckResult, ReportResult } from '../types.js';
 import type { Diagnostic, DiagnosticSeverity } from './types.js';
+import { MIN_PAGES_FOR_SCORING } from '../constants.js';
 
 interface DiagnosticDefinition {
   id: string;
@@ -262,13 +263,22 @@ const DIAGNOSTIC_DEFINITIONS: DiagnosticDefinition[] = [
     triggers: (_results, _triggered, report) => {
       const isDiscoveryBased =
         report.samplingStrategy === 'random' || report.samplingStrategy === 'deterministic';
-      return isDiscoveryBased && report.testedPages === 1;
+      return (
+        isDiscoveryBased &&
+        report.testedPages !== undefined &&
+        report.testedPages < MIN_PAGES_FOR_SCORING
+      );
+    },
+    message: (_results, _triggered, report) => {
+      const n = report.testedPages ?? 0;
+      const pageWord = n === 1 ? 'page was' : 'pages were';
+      return (
+        `Only ${n} ${pageWord} discovered and tested (minimum ${MIN_PAGES_FOR_SCORING} ` +
+        'needed for reliable scoring). Page-level category scores (page size, ' +
+        'content structure, URL stability, etc.) may not represent the site. ' +
+        'These categories are marked as N/A in the score.'
+      );
     },
-    message: () =>
-      'Only one page was discovered and tested. Page-level category scores ' +
-      '(page size, content structure, URL stability, etc.) are based on a ' +
-      'single page and may not represent the site. These categories are ' +
-      'marked as N/A in the score.',
     resolution:
       'If your site has an llms.txt, ensure it contains working links so ' +
       'the tool can discover more pages. If testing a preview deployment, ' +
diff --git a/src/scoring/score.ts b/src/scoring/score.ts
index 29198d8..cae19fc 100644
--- a/src/scoring/score.ts
+++ b/src/scoring/score.ts
@@ -1,5 +1,5 @@
 import type { CheckResult, ReportResult } from '../types.js';
-import { CATEGORIES } from '../constants.js';
+import { CATEGORIES, MIN_PAGES_FOR_SCORING } from '../constants.js';
 import type { CategoryScore, CheckScore, Grade, ScoreCap, ScoreResult } from './types.js';
 import { getCheckWeight } from './weights.js';
 import { getCheckProportion } from './proportions.js';
@@ -42,7 +42,10 @@ export function computeScore(report: ReportResult): ScoreResult {
   // Determine if page-level scores lack meaningful data
   const isDiscoveryBased =
     report.samplingStrategy === 'random' || report.samplingStrategy === 'deterministic';
-  const insufficientData = isDiscoveryBased && report.testedPages === 1;
+  const insufficientData =
+    isDiscoveryBased &&
+    report.testedPages !== undefined &&
+    report.testedPages < MIN_PAGES_FOR_SCORING;
 
   // Compute per-check scores
   const checkScores: Record<string, CheckScore> = {};
diff --git a/test/unit/cli/scorecard-formatter.test.ts b/test/unit/cli/scorecard-formatter.test.ts
index ba1f199..e4b6235 100644
--- a/test/unit/cli/scorecard-formatter.test.ts
+++ b/test/unit/cli/scorecard-formatter.test.ts
@@ -465,6 +465,6 @@ describe('formatScorecard', () => {
     expect(output).toContain('(N/A)');
     // Should fire the single-page-sample diagnostic
     expect(output).toContain('Interaction Diagnostics');
-    expect(output).toContain('Only one page was discovered');
+    expect(output).toContain('Only 1 page was discovered');
   });
 });
diff --git a/test/unit/scoring/diagnostics.test.ts b/test/unit/scoring/diagnostics.test.ts
index 5791267..4a6a7d7 100644
--- a/test/unit/scoring/diagnostics.test.ts
+++ b/test/unit/scoring/diagnostics.test.ts
@@ -356,12 +356,21 @@ describe('diagnostics', () => {
       expect(diags.find((d) => d.id === 'single-page-sample')).toBeUndefined();
     });
 
-    it('does not trigger when testedPages > 1', () => {
+    it('does not trigger when testedPages >= threshold', () => {
       const report = { ...defaultReport(), testedPages: 5, samplingStrategy: 'random' as const };
       const diags = evaluateDiagnostics(resultsMap(), report);
       expect(diags.find((d) => d.id === 'single-page-sample')).toBeUndefined();
     });
 
+    it('triggers when testedPages is below threshold but above 1', () => {
+      const report = { ...defaultReport(), testedPages: 3, samplingStrategy: 'random' as const };
+      const diags = evaluateDiagnostics(resultsMap(), report);
+      const diag = diags.find((d) => d.id === 'single-page-sample');
+      expect(diag).toBeDefined();
+      expect(diag!.message).toContain('3 pages were');
+      expect(diag!.message).toContain('minimum 5');
+    });
+
     it('does not trigger when testedPages is undefined', () => {
       const report = { ...defaultReport(), testedPages: undefined };
       const diags = evaluateDiagnostics(resultsMap(), report);
diff --git a/test/unit/scoring/score.test.ts b/test/unit/scoring/score.test.ts
index 243252c..dc706b2 100644
--- a/test/unit/scoring/score.test.ts
+++ b/test/unit/scoring/score.test.ts
@@ -487,7 +487,7 @@ describe('computeScore', () => {
     it('does not mark checks as notApplicable when testedPages > 1', () => {
       const results: CheckResult[] = [makeResult('page-size-html', 'page-size', 'pass')];
       const score = computeScore(
-        makeReport(results, { testedPages: 5, samplingStrategy: 'random' }),
+        makeReport(results, { testedPages: 10, samplingStrategy: 'random' }),
       );
       expect(score.checkScores['page-size-html'].scoreDisplayMode).toBe('numeric');
     });
@@ -536,7 +536,7 @@ describe('computeScore', () => {
       );
       // Without N/A: both count, page-size-html fails -> less than 100
       const scoreNormal = computeScore(
-        makeReport(results, { testedPages: 5, samplingStrategy: 'random' }),
+        makeReport(results, { testedPages: 10, samplingStrategy: 'random' }),
       );
       expect(scoreNA.overall).toBe(100);
       expect(scoreNormal.overall).toBeLessThan(100);

From de0fae27e78898a994293be7b78a8f7f9cc94218 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sat, 25 Apr 2026 18:13:24 -0400
Subject: [PATCH 07/13] Handle intentional audience segmentation in
 'markdown-content-parity'

---
 docs/checks/observability.md                  |  65 ++++-
 docs/reference/cli.md                         |  23 +-
 docs/reference/config-file.md                 |  13 +-
 .../observability/markdown-content-parity.ts  |  98 +++++--
 src/cli/commands/check.ts                     |  42 ++-
 src/constants.ts                              |   6 +
 src/helpers/config.ts                         |  37 +++
 src/scoring/resolutions.ts                    |  12 +-
 src/types.ts                                  |   6 +
 .../checks/markdown-content-parity.test.ts    | 275 +++++++++++++++++-
 test/unit/helpers/config.test.ts              |  84 ++++++
 11 files changed, 620 insertions(+), 41 deletions(-)

diff --git a/docs/checks/observability.md b/docs/checks/observability.md
index 7c0a240..5b44a67 100644
--- a/docs/checks/observability.md
+++ b/docs/checks/observability.md
@@ -36,8 +36,8 @@ The check supports three use cases through configurable thresholds and exclusion
 
 **CLI flags:**
 
-- `--coverage-pass-threshold <n>` — Pass threshold (0-100, default 95)
-- `--coverage-warn-threshold <n>` — Warn threshold (0-100, default 80)
+- `--coverage-pass-threshold <n>` — Minimum coverage % to pass (0-100, default 95; higher = stricter)
+- `--coverage-warn-threshold <n>` — Minimum coverage % to avoid failure (0-100, default 80; higher = stricter)
 - `--coverage-exclusions <patterns>` — Comma-separated glob patterns to exclude from the sitemap before calculating coverage (e.g. `"/docs/reference/**,/docs/changelog/**"`)
 
 These can also be set in `agent-docs.config.yml` under `options`:
@@ -108,21 +108,66 @@ Whether markdown and HTML versions of pages contain the same content.
 
 When markdown is generated separately from HTML (not served directly from source), the two can drift. A site updates its HTML but forgets to regenerate the markdown, leaving agents with outdated instructions or code examples. Or a build pipeline that generates markdown misses some of the content. This is particularly insidious because agents receiving the markdown version have no signal that content is missing or outdated, and humans typically don't look at both page formats to spot discrepancies.
 
+However, content divergence is sometimes intentional. Some sites serve different content to different audiences: agent-optimized markdown alongside human-optimized HTML. In those cases, divergence is a feature, not a bug. The check supports this through audience-segmentation markers and configurable thresholds.
+
 ### Results
 
-Based on the percentage of HTML content segments missing from the markdown version, after normalization:
+Based on the percentage of HTML content segments missing from the markdown version, after normalization. Thresholds are configurable.
+
+| Result | Condition                                                     |
+| ------ | ------------------------------------------------------------- |
+| Pass   | Under pass threshold (default 5%) of content segments missing |
+| Warn   | Between pass and warn thresholds (default 5-20% missing)      |
+| Fail   | Above warn threshold (default 20% or more missing)            |
+
+### Audience segmentation
+
+Some documentation platforms let site owners serve different content to different audiences. For example, a page might show UI-oriented instructions ("Click the gear icon...") in HTML but API-oriented instructions ("Call `POST /v1/settings`...") in markdown. The check accounts for this in two ways:
+
+**`data-markdown-ignore` attribute.** Add this attribute to HTML elements that contain human-only content (content intentionally excluded from markdown). The check strips these elements before comparing, so they don't count as "missing."
+
+```html
+<div data-markdown-ignore>
+  <p>Click the gear icon in the top-right corner to open settings.</p>
+</div>
+```
+
+This is the recommended convention for platforms that render HTML server-side. If your documentation platform controls the HTML output, adding `data-markdown-ignore` to human-only wrapper elements lets the parity check handle segmentation automatically with no user configuration.
+
+**Configurable thresholds.** For platforms that process segmentation tags server-side (like Fern and Mintlify, where the tags never appear in the rendered HTML), adjust thresholds to match your expected divergence level.
+
+### Configuring parity
+
+The check supports three use cases, matching the same mirrored-to-curated spectrum as `llms-txt-coverage`:
+
+- **Mirrored** (default): Markdown should match HTML. Default thresholds (5/20) apply.
+- **Segmented**: The site uses `data-markdown-ignore` to mark human-only HTML content. The check strips tagged content before comparing; remaining shared content is held to default thresholds.
+- **Curated**: The site intentionally serves different content with no tag-level signal. Set thresholds to 0 (`--parity-pass-threshold 0 --parity-warn-threshold 0`) to make the check informational. It still reports the missing percentage, but does not warn or fail.
+
+**CLI flags:**
+
+- `--parity-pass-threshold <n>` — Maximum missing % to pass (0-100, default 5; lower = stricter). Set to 0 to disable warnings.
+- `--parity-warn-threshold <n>` — Maximum missing % to avoid failure (0-100, default 20; lower = stricter). Set to 0 to disable failures.
+- `--parity-exclusions <selectors>` — Comma-separated CSS selectors to strip from HTML before comparison, for platform-specific conventions beyond `data-markdown-ignore` (e.g. `".human-only,[data-audience='humans']"`)
+
+These can also be set in `agent-docs.config.yml` under `options`:
+
+```yaml
+options:
+  parityPassThreshold: 10
+  parityWarnThreshold: 30
+  parityExclusions:
+    - .human-only-content
+    - '[data-audience="humans"]' # quote selectors starting with [ (YAML treats unquoted [] as arrays)
+```
 
-| Result | Condition                                                                                     |
-| ------ | --------------------------------------------------------------------------------------------- |
-| Pass   | Under 5% of content segments missing                                                          |
-| Warn   | 5-20% missing (minor differences like formatting or navigation elements)                      |
-| Fail   | 20% or more missing (substantive differences like missing sections or outdated code examples) |
+Note: `data-markdown-ignore` is built in and does not need to be listed in `parityExclusions`. The exclusions option is only for additional platform-specific conventions.
 
 ### How to fix
 
-**If this check warns**, review the differences for formatting variations. Minor parity issues (navigation elements present in one format but not the other) may be acceptable.
+**If this check warns**, review the differences. If they reflect intentional audience segmentation, either add `data-markdown-ignore` to the human-only HTML elements or adjust thresholds. If they reflect formatting variations, minor parity issues (navigation elements present in one format but not the other) may be acceptable.
 
-**If this check fails**, your markdown and HTML versions have substantive content differences. Regenerate markdown from source, or fix the build pipeline to keep both formats in sync. The most reliable approach is serving markdown directly from the same source files used to generate HTML, rather than maintaining two separate outputs.
+**If this check fails**, your markdown and HTML versions have substantive content differences. If unintentional, regenerate markdown from source or fix the build pipeline. The most reliable approach is serving markdown directly from the same source files used to generate HTML. If intentional (audience segmentation), add `data-markdown-ignore` to human-only HTML elements, use `--parity-exclusions` for custom conventions, or set thresholds to 0 for informational mode.
 
 ---
 
diff --git a/docs/reference/cli.md b/docs/reference/cli.md
index 987dbe8..96d8f54 100644
--- a/docs/reference/cli.md
+++ b/docs/reference/cli.md
@@ -202,8 +202,8 @@ The defaults (50K pass, 100K fail) reflect observed agent truncation limits. You
 
 | Flag                               | Default | Description                                                   |
 | ---------------------------------- | ------- | ------------------------------------------------------------- |
-| `--coverage-pass-threshold <n>`    | `95`    | `llms-txt-coverage` pass threshold (percentage, 0-100)        |
-| `--coverage-warn-threshold <n>`    | `80`    | `llms-txt-coverage` warn threshold (percentage, 0-100)        |
+| `--coverage-pass-threshold <n>`    | `95`    | Minimum coverage % to pass (higher = stricter)                |
+| `--coverage-warn-threshold <n>`    | `80`    | Minimum coverage % to avoid failure (higher = stricter)       |
 | `--coverage-exclusions <patterns>` |         | Comma-separated glob patterns to exclude from the denominator |
 
 These control the `llms-txt-coverage` check, which compares `llms.txt` page URLs against the sitemap. Set both thresholds to `0` to make the check informational: it still reports coverage percentage and missing pages, but doesn't warn or fail.
@@ -214,6 +214,25 @@ Use exclusion patterns with glob syntax (`**` matches across path segments, `*`
 afdocs check https://example.com --coverage-exclusions "/docs/reference/**,/docs/changelog/**"
 ```
 
+### Parity thresholds
+
+| Flag                              | Default | Description                                                               |
+| --------------------------------- | ------- | ------------------------------------------------------------------------- |
+| `--parity-pass-threshold <n>`     | `5`     | Maximum missing % to pass (lower = stricter)                              |
+| `--parity-warn-threshold <n>`     | `20`    | Maximum missing % to avoid failure (lower = stricter)                     |
+| `--parity-exclusions <selectors>` |         | Comma-separated CSS selectors to strip from HTML before parity comparison |
+
+These control the `markdown-content-parity` check, which compares HTML content against markdown to detect drift. Set both thresholds to `0` to make the check informational: it still reports the missing percentage, but doesn't warn or fail. This is useful for sites that intentionally serve different content to different audiences.
+
+The check also has built-in support for the `data-markdown-ignore` HTML attribute. Elements with this attribute are stripped from the HTML before comparison, so human-only content doesn't count as "missing" from markdown.
+
+Use `--parity-exclusions` for platform-specific conventions beyond `data-markdown-ignore`:
+
+```bash
+# Strip elements with a custom class or data attribute
+afdocs check https://example.com --parity-exclusions ".human-only-content,[data-audience='humans']"
+```
+
 ## Exit codes
 
 | Code | Meaning                     |
diff --git a/docs/reference/config-file.md b/docs/reference/config-file.md
index f31f28e..19686c6 100644
--- a/docs/reference/config-file.md
+++ b/docs/reference/config-file.md
@@ -43,6 +43,12 @@ options:
   #   - /docs/reference/**
   #   - /docs/changelog/**
   #   - "**/release-notes/**"  # quote patterns starting with *
+  # Parity check: thresholds and exclusions
+  # parityPassThreshold: 5
+  # parityWarnThreshold: 20
+  # parityExclusions:
+  #   - .human-only-content
+  #   - '[data-audience="humans"]'  # quote selectors starting with [ (YAML treats unquoted [] as arrays)
 
 # Optional: test specific pages instead of discovering via llms.txt/sitemap
 # pages:
@@ -91,9 +97,12 @@ Override default runner options. All fields are optional:
 | `llmsTxtUrl`            |             | Explicit llms.txt URL to use as canonical (overrides the discovery heuristic; see CLI docs)              |
 | `thresholds.pass`       | `50000`     | Page size pass threshold in characters                                                                   |
 | `thresholds.fail`       | `100000`    | Page size fail threshold in characters                                                                   |
-| `coveragePassThreshold` | `95`        | `llms-txt-coverage` pass threshold (percentage, 0-100)                                                   |
-| `coverageWarnThreshold` | `80`        | `llms-txt-coverage` warn threshold (percentage, 0-100)                                                   |
+| `coveragePassThreshold` | `95`        | `llms-txt-coverage` pass threshold: minimum coverage % to pass (higher = stricter)                       |
+| `coverageWarnThreshold` | `80`        | `llms-txt-coverage` warn threshold: minimum coverage % to avoid failure (higher = stricter)              |
 | `coverageExclusions`    |             | Glob patterns to exclude from the sitemap before calculating coverage (quote patterns starting with `*`) |
+| `parityPassThreshold`   | `5`         | `markdown-content-parity` pass threshold: maximum missing % to pass (lower = stricter)                   |
+| `parityWarnThreshold`   | `20`        | `markdown-content-parity` warn threshold: maximum missing % to avoid failure (lower = stricter)          |
+| `parityExclusions`      |             | CSS selectors to strip from HTML before parity comparison                                                |
 
 ### `pages` (optional)
 
diff --git a/src/checks/observability/markdown-content-parity.ts b/src/checks/observability/markdown-content-parity.ts
index 13c7157..d97aaea 100644
--- a/src/checks/observability/markdown-content-parity.ts
+++ b/src/checks/observability/markdown-content-parity.ts
@@ -2,12 +2,9 @@ import { parse } from 'node-html-parser';
 import { registerCheck } from '../registry.js';
 import { fetchPage } from '../../helpers/fetch-page.js';
 import { toHtmlUrl } from '../../helpers/to-md-urls.js';
+import { DEFAULT_PARITY_PASS_THRESHOLD, DEFAULT_PARITY_WARN_THRESHOLD } from '../../constants.js';
 import type { CheckContext, CheckResult, CheckStatus } from '../../types.js';
 
-/** Thresholds for the percentage of HTML segments not found in markdown. */
-const WARN_THRESHOLD = 5;
-const FAIL_THRESHOLD = 20;
-
 /** Minimum character length for a text segment to be considered meaningful. */
 const MIN_SEGMENT_LENGTH = 20;
 
@@ -268,7 +265,12 @@ const CONTENT_SELECTORS = [
   '.prose',
 ];
 
-function extractHtmlText(html: string): string {
+interface HtmlExtractionResult {
+  text: string;
+  segmentationStripped: number;
+}
+
+function extractHtmlText(html: string, parityExclusions?: string[]): HtmlExtractionResult {
   const root = parse(html);
 
   // Prefer the tightest content container available.
@@ -298,7 +300,32 @@ function extractHtmlText(html: string): string {
   }
 
   if (!content) content = root.querySelector('body');
-  if (!content) return root.text;
+  if (!content) return { text: root.text, segmentationStripped: 0 };
+
+  // Strip audience-segmentation elements before comparison.
+  // data-markdown-ignore marks content intended only for human readers;
+  // it is expected to be absent from the markdown version.
+  let segmentationStripped = 0;
+  for (const el of content.querySelectorAll('[data-markdown-ignore]')) {
+    el.remove();
+    segmentationStripped++;
+  }
+
+  // Strip user-provided CSS selectors (additional platform conventions)
+  if (parityExclusions?.length) {
+    for (const selector of parityExclusions) {
+      try {
+        for (const el of content.querySelectorAll(selector)) {
+          el.remove();
+        }
+      } catch {
+        throw new Error(
+          `Invalid CSS selector in parityExclusions: "${selector}". ` +
+            'If the selector contains [ or ], wrap it in quotes in your YAML config.',
+        );
+      }
+    }
+  }
 
   // Remove non-content elements by tag
   for (const tag of STRIP_TAGS) {
@@ -345,24 +372,18 @@ function extractHtmlText(html: string): string {
   // Expressive Code / Shiki use <div class="ec-line"> inside <pre>),
   // then strip HTML tags while preserving angle-bracket placeholders
   // like <YOUR_API_KEY> or <clusterName> (decoded from &lt;...&gt; entities).
-  return content.text
+  const text = content.text
     .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
     .replace(/<!--[\s\S]*?-->/g, '')
     .replace(/<div[\s>]/gi, '\n<div ')
     .replace(/<\/[^>\s]+>/g, '')
     .replace(/<([a-zA-Z][a-zA-Z0-9-]*)([^>]*)>/g, (_match, tag, rest) => {
       const lower = tag.toLowerCase();
-      // Tags already removed at the DOM level can't appear as real elements
-      // in .text output — they must be entity-decoded text (e.g., prose
-      // discussing <nav> elements). Keep the tag name as text content.
       if (DOM_STRIPPED_TAGS.has(lower)) return tag;
-      // Other known tags (span, div, code, etc.) appear in <pre> block
-      // text output from syntax highlighting — strip them entirely.
       if (HTML_TAG_NAMES.has(lower)) return '';
-      // Unknown "tags" are angle-bracket placeholders like <YOUR_API_KEY>
-      // decoded from entities — keep the full content.
       return tag + rest;
     });
+  return { text, segmentationStripped };
 }
 
 /**
@@ -512,6 +533,8 @@ function toSegments(text: string): string[] {
 function computeParity(
   htmlText: string,
   markdownText: string,
+  warnThreshold: number,
+  failThreshold: number,
 ): Omit<PageParityResult, 'url' | 'markdownSource' | 'error'> {
   // Deduplicate segments so repeated chrome (breadcrumbs, nav titles) or
   // repeated content is only counted once when checking for presence.
@@ -565,13 +588,18 @@ function computeParity(
   const missingPercent =
     htmlSegments.length > 0 ? Math.round((missingCount / htmlSegments.length) * 100) : 0;
 
+  // A threshold of 0 means "disabled" (informational mode per spec).
+  // This naturally falls out: `0 > 0` is false, so the guard prevents
+  // the threshold from firing, and the check passes.
+  const shouldFail = failThreshold > 0 && missingPercent >= failThreshold;
+  const shouldWarn = warnThreshold > 0 && missingPercent >= warnThreshold;
   let status: CheckStatus;
-  if (missingPercent < WARN_THRESHOLD) {
-    status = 'pass';
-  } else if (missingPercent < FAIL_THRESHOLD) {
+  if (shouldFail) {
+    status = 'fail';
+  } else if (shouldWarn) {
     status = 'warn';
   } else {
-    status = 'fail';
+    status = 'pass';
   }
 
   return {
@@ -619,8 +647,25 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
     };
   }
 
+  const warnThreshold = ctx.options.parityPassThreshold ?? DEFAULT_PARITY_PASS_THRESHOLD;
+  const failThreshold = ctx.options.parityWarnThreshold ?? DEFAULT_PARITY_WARN_THRESHOLD;
+  const parityExclusions = ctx.options.parityExclusions;
+
+  if (warnThreshold > failThreshold && failThreshold > 0) {
+    return {
+      id,
+      category,
+      status: 'error',
+      message:
+        `parityPassThreshold (${warnThreshold}) is greater than ` +
+        `parityWarnThreshold (${failThreshold}). The pass threshold must be ` +
+        'less than or equal to the warn threshold.',
+    };
+  }
+
   const results: PageParityResult[] = [];
   const concurrency = ctx.options.maxConcurrency;
+  let totalSegmentationStripped = 0;
 
   for (let i = 0; i < pagesToCompare.length; i += concurrency) {
     const batch = pagesToCompare.slice(i, i + concurrency);
@@ -658,8 +703,12 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
             };
           }
 
-          const htmlText = extractHtmlText(page.body);
-          const parity = computeParity(htmlText, markdownContent);
+          const { text: htmlText, segmentationStripped } = extractHtmlText(
+            page.body,
+            parityExclusions,
+          );
+          totalSegmentationStripped += segmentationStripped;
+          const parity = computeParity(htmlText, markdownContent, warnThreshold, failThreshold);
 
           return { url, markdownSource, ...parity };
         } catch (err) {
@@ -727,6 +776,15 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
       failBucket,
       fetchErrors,
       avgMissingPercent,
+      ...(totalSegmentationStripped > 0 && {
+        segmentationElementsStripped: totalSegmentationStripped,
+      }),
+      ...(ctx.options.parityPassThreshold != null && {
+        parityPassThreshold: warnThreshold,
+      }),
+      ...(ctx.options.parityWarnThreshold != null && {
+        parityWarnThreshold: failThreshold,
+      }),
       pageResults: results,
     },
   };
diff --git a/src/cli/commands/check.ts b/src/cli/commands/check.ts
index f3f6992..c43d832 100644
--- a/src/cli/commands/check.ts
+++ b/src/cli/commands/check.ts
@@ -38,12 +38,30 @@ export function registerCheckCommand(program: Command): void {
     .option('-v, --verbose', 'Show per-page details for checks with issues')
     .option('--fixes', 'Show fix suggestions for warn/fail checks')
     .option('--score', 'Include scoring data in JSON output')
-    .option('--coverage-pass-threshold <n>', 'llms-txt-coverage pass threshold (0-100, default 95)')
-    .option('--coverage-warn-threshold <n>', 'llms-txt-coverage warn threshold (0-100, default 80)')
+    .option(
+      '--coverage-pass-threshold <n>',
+      'Minimum coverage % to pass (0-100, default 95; higher = stricter)',
+    )
+    .option(
+      '--coverage-warn-threshold <n>',
+      'Minimum coverage % to avoid failure (0-100, default 80; higher = stricter)',
+    )
     .option(
       '--coverage-exclusions <patterns>',
       'Comma-separated glob patterns to exclude from coverage denominator',
     )
+    .option(
+      '--parity-pass-threshold <n>',
+      'Maximum missing % to pass (0-100, default 5; lower = stricter)',
+    )
+    .option(
+      '--parity-warn-threshold <n>',
+      'Maximum missing % to avoid failure (0-100, default 20; lower = stricter)',
+    )
+    .option(
+      '--parity-exclusions <selectors>',
+      'Comma-separated CSS selectors to strip from HTML before parity comparison',
+    )
     .option(
       '--canonical-origin <url>',
       'The production domain your content links to (for preview/staging testing)',
@@ -244,6 +262,23 @@ export function registerCheckCommand(program: Command): void {
               .filter(Boolean)
           : (config?.options?.coverageExclusions ?? undefined);
 
+      const parityPassThreshold =
+        opts.parityPassThreshold != null
+          ? parseInt(String(opts.parityPassThreshold), 10)
+          : (config?.options?.parityPassThreshold ?? undefined);
+      const parityWarnThreshold =
+        opts.parityWarnThreshold != null
+          ? parseInt(String(opts.parityWarnThreshold), 10)
+          : (config?.options?.parityWarnThreshold ?? undefined);
+
+      const parityExclusions =
+        opts.parityExclusions != null
+          ? (opts.parityExclusions as string)
+              .split(',')
+              .map((s) => s.trim())
+              .filter(Boolean)
+          : (config?.options?.parityExclusions ?? undefined);
+
       const report = await runChecks(url, {
         checkIds,
         skipCheckIds,
@@ -263,6 +298,9 @@ export function registerCheckCommand(program: Command): void {
         ...(coveragePassThreshold != null && { coveragePassThreshold }),
         ...(coverageWarnThreshold != null && { coverageWarnThreshold }),
         ...(coverageExclusions && { coverageExclusions }),
+        ...(parityPassThreshold != null && { parityPassThreshold }),
+        ...(parityWarnThreshold != null && { parityWarnThreshold }),
+        ...(parityExclusions && { parityExclusions }),
       });
 
       let output: string;
diff --git a/src/constants.ts b/src/constants.ts
index 1108489..af064b6 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -40,6 +40,12 @@ export const DEFAULT_COVERAGE_PASS_THRESHOLD = 95;
 /** Default llms-txt-coverage warn threshold (percentage). */
 export const DEFAULT_COVERAGE_WARN_THRESHOLD = 80;
 
+/** Default markdown-content-parity pass threshold (percentage of missing segments). */
+export const DEFAULT_PARITY_PASS_THRESHOLD = 5;
+
+/** Default markdown-content-parity warn threshold (percentage of missing segments). */
+export const DEFAULT_PARITY_WARN_THRESHOLD = 20;
+
 /** Minimum discovered pages before page-level scores are considered meaningful. */
 export const MIN_PAGES_FOR_SCORING = 5;
 
diff --git a/src/helpers/config.ts b/src/helpers/config.ts
index 31afbf2..1e12ce4 100644
--- a/src/helpers/config.ts
+++ b/src/helpers/config.ts
@@ -39,6 +39,34 @@ export function validatePages(pages: unknown[], source: string): void {
   }
 }
 
+/**
+ * Validate that a config field is a flat array of strings.
+ * Catches the common YAML mistake of unquoted values containing special
+ * characters (e.g., `[data-foo]` parsed as a nested array instead of a string).
+ */
+function validateStringArray(value: unknown, field: string, source: string): void {
+  if (!Array.isArray(value)) {
+    throw new Error(`${source}: "${field}" must be an array of strings`);
+  }
+  for (let i = 0; i < value.length; i++) {
+    if (typeof value[i] !== 'string') {
+      throw new Error(
+        `${source}: ${field}[${i}] must be a string, got ${typeof value[i]}. ` +
+          'If the value contains [ or *, wrap it in quotes.',
+      );
+    }
+  }
+}
+
+function validateOptions(options: Record<string, unknown>, source: string): void {
+  if (options.coverageExclusions != null) {
+    validateStringArray(options.coverageExclusions, 'options.coverageExclusions', source);
+  }
+  if (options.parityExclusions != null) {
+    validateStringArray(options.parityExclusions, 'options.parityExclusions', source);
+  }
+}
+
 /**
  * Search for an agent-docs config file starting from `dir` and walking up
  * to the filesystem root (like eslint, prettier, etc.).
@@ -60,6 +88,9 @@ export async function loadConfig(dir?: string): Promise<AgentDocsConfig> {
           assertPagesArray(parsed.pages, filepath);
           validatePages(parsed.pages, filepath);
         }
+        if (parsed.options) {
+          validateOptions(parsed.options as Record<string, unknown>, filepath);
+        }
         return parsed;
       } catch (err) {
         if ((err as NodeJS.ErrnoException).code === 'ENOENT') continue;
@@ -96,6 +127,9 @@ export async function findConfig(
       assertPagesArray(parsed.pages, filepath);
       validatePages(parsed.pages, filepath);
     }
+    if (parsed.options) {
+      validateOptions(parsed.options as Record<string, unknown>, filepath);
+    }
     return parsed;
   }
 
@@ -110,6 +144,9 @@ export async function findConfig(
           assertPagesArray(parsed.pages, filepath);
           validatePages(parsed.pages, filepath);
         }
+        if (parsed.options) {
+          validateOptions(parsed.options as Record<string, unknown>, filepath);
+        }
         return parsed;
       } catch (err) {
         if ((err as NodeJS.ErrnoException).code === 'ENOENT') continue;
diff --git a/src/scoring/resolutions.ts b/src/scoring/resolutions.ts
index 88492ef..17b014b 100644
--- a/src/scoring/resolutions.ts
+++ b/src/scoring/resolutions.ts
@@ -314,7 +314,9 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       const warnCount = (d.warnBucket as number) ?? 0;
       return (
         `${warnCount} pages have minor content differences between their ` +
-        'markdown and HTML versions. Review for formatting variations.'
+        'markdown and HTML versions. If this is intentional audience ' +
+        'segmentation, adjust --parity-pass-threshold and ' +
+        '--parity-warn-threshold (set both to 0 for informational mode).'
       );
     },
     fail: (d) => {
@@ -323,9 +325,11 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       return (
         `${failCount} pages have substantive content differences between ` +
         `markdown and HTML (avg ${Math.round(avgMissing)}% missing). ` +
-        'Agents receiving the markdown version are getting outdated or ' +
-        'incomplete content. Regenerate markdown from source or fix the ' +
-        'build pipeline.'
+        'If unintentional, agents are getting outdated content; ' +
+        'regenerate markdown from source or fix the build pipeline. ' +
+        'If intentional (audience segmentation), add ' +
+        'data-markdown-ignore to human-only HTML elements, or adjust ' +
+        'thresholds with --parity-pass-threshold/--parity-warn-threshold.'
       );
     },
   },
diff --git a/src/types.ts b/src/types.ts
index 9805460..2ed67e0 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -90,6 +90,12 @@ export interface CheckOptions {
   coverageWarnThreshold?: number;
   /** Glob patterns to exclude from the sitemap before calculating coverage. */
   coverageExclusions?: string[];
+  /** Pass threshold for markdown-content-parity (0–100). Default 5. */
+  parityPassThreshold?: number;
+  /** Warn threshold for markdown-content-parity (0–100). Default 20. */
+  parityWarnThreshold?: number;
+  /** CSS selectors to strip from HTML before parity comparison (e.g. '[data-markdown-ignore]'). */
+  parityExclusions?: string[];
   /**
    * Explicit URL to use as the canonical llms.txt for downstream sampling and
    * analysis. When set, the standard candidate-discovery heuristic is bypassed
diff --git a/test/unit/checks/markdown-content-parity.test.ts b/test/unit/checks/markdown-content-parity.test.ts
index cac7953..c2f198b 100644
--- a/test/unit/checks/markdown-content-parity.test.ts
+++ b/test/unit/checks/markdown-content-parity.test.ts
@@ -18,8 +18,9 @@ describe('markdown-content-parity', () => {
   function makeCtx(
     pages: Array<{ url: string; markdown: string; htmlBody: string }>,
     host: string,
+    options?: Record<string, unknown>,
   ) {
-    const ctx = createContext(`http://${host}`, { requestDelay: 0 });
+    const ctx = createContext(`http://${host}`, { requestDelay: 0, ...options });
 
     // Simulate upstream markdown-url-support having run
     ctx.previousResults.set('markdown-url-support', {
@@ -1840,4 +1841,276 @@ Use a linter to automatically verify code fence syntax across all your documents
     const pageResults = result.details?.pageResults as Array<{ missingSegments: number }>;
     expect(pageResults[0].missingSegments).toBe(0);
   });
+
+  // --- Audience segmentation tests ---
+
+  it('strips data-markdown-ignore elements from HTML before comparison', async () => {
+    // Content inside data-markdown-ignore is intended only for human readers;
+    // it is expected to be absent from the markdown version.
+    const html = `<html><body><main>
+      <h1>Getting Started</h1>
+      <p>Install the SDK with npm to add it to your project dependencies list.</p>
+      <div data-markdown-ignore>
+        <p>Click the gear icon in the top-right corner to open the settings panel.</p>
+        <p>Navigate to the API section and click Generate New Key to create credentials.</p>
+      </div>
+      <p>Then import the client module and configure it with your API credentials here.</p>
+      <p>Run the following commands to get started with the installation process now.</p>
+      <p>After installation import the client and call the initialize method first please.</p>
+      <p>The client will automatically detect your configuration from environment variables.</p>
+      <p>You can override any configuration option by passing it to the constructor directly.</p>
+      <p>Make sure your API key is set before attempting to make any requests to the server.</p>
+      <p>The library validates all configuration options and throws helpful error messages.</p>
+      <p>Connection pooling is handled automatically for optimal performance and throughput.</p>
+      <p>TLS certificates are verified by default to ensure secure communication channels.</p>
+    </main></body></html>`;
+
+    const markdown = `# Getting Started
+
+Install the SDK with npm to add it to your project dependencies list.
+
+Then import the client module and configure it with your API credentials here.
+
+Run the following commands to get started with the installation process now.
+
+After installation import the client and call the initialize method first please.
+
+The client will automatically detect your configuration from environment variables.
+
+You can override any configuration option by passing it to the constructor directly.
+
+Make sure your API key is set before attempting to make any requests to the server.
+
+The library validates all configuration options and throws helpful error messages.
+
+Connection pooling is handled automatically for optimal performance and throughput.
+
+TLS certificates are verified by default to ensure secure communication channels.`;
+
+    const url = 'http://mcp-segmentation.local/docs/start';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-segmentation.local');
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    const pageResults = result.details?.pageResults as Array<{ missingSegments: number }>;
+    expect(pageResults[0].missingSegments).toBe(0);
+    expect(result.details?.segmentationElementsStripped).toBe(1);
+  });
+
+  it('uses configurable thresholds (0/0 = informational mode)', async () => {
+    // Setting both thresholds to 0 means the check always passes,
+    // making it informational for sites with intentional content divergence.
+    const html = `<html><body>
+      <h1>Installation Guide</h1>
+      <p>You need Node.js 18 or later installed on your system before proceeding.</p>
+      <p>Create a configuration file with your API credentials and region settings.</p>
+      <p>Import and initialize the client using the configuration you just created.</p>
+      <p>Run the health check to verify everything is working correctly in your environment.</p>
+      <p>Common issues include connection timeouts and authentication failures with expired keys.</p>
+      <p>Check your network connectivity if you experience connection timeout errors repeatedly.</p>
+      <p>Verify your API key has not expired if you see authentication failure messages.</p>
+      <p>Make sure the target host is accessible and responding to network requests properly.</p>
+      <p>Review the troubleshooting section for additional debugging information and tips.</p>
+      <p>Contact support if you continue to experience issues after following these steps.</p>
+      <p>The installation process should take approximately five minutes from start to finish.</p>
+    </body></html>`;
+
+    const markdown = `# Changelog
+
+## v2.0.0
+
+Breaking changes in this release that affect all existing integrations.
+
+## v1.5.0
+
+Added new features for managing team resources and permissions.`;
+
+    const url = 'http://mcp-informational.local/docs/install';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-informational.local', {
+      parityPassThreshold: 0,
+      parityWarnThreshold: 0,
+    });
+    const result = await check.run(ctx);
+    // With thresholds at 0, even massive differences pass
+    expect(result.status).toBe('pass');
+    const pageResults = result.details?.pageResults as Array<{ missingPercent: number }>;
+    // The missing percentage is still reported even though the check passes
+    expect(pageResults[0].missingPercent).toBeGreaterThan(0);
+  });
+
+  it('applies custom parityExclusions CSS selectors', async () => {
+    // Site uses a custom attribute for audience segmentation that isn't
+    // data-markdown-ignore. The user provides it via parityExclusions.
+    const html = `<html><body><main>
+      <h1>API Reference</h1>
+      <p>The API supports the following operations for managing resources securely.</p>
+      <section class="human-only-ui-steps">
+        <p>Open the dashboard and navigate to the API Keys section in the sidebar.</p>
+        <p>Click the Create button to generate a new API key for your application.</p>
+      </section>
+      <p>Use bearer tokens for authentication with the API endpoints on every request.</p>
+      <p>Include your API key in the Authorization header of every request you send.</p>
+      <p>Requests are limited to one hundred per minute per API key by default now.</p>
+      <p>Error responses include a JSON body with a code field and a message for details.</p>
+      <p>Use cursor-based pagination with the after parameter in your requests to pages.</p>
+      <p>Each page returns up to fifty items by default unless configured otherwise here.</p>
+      <p>The SDK supports automatic retries with exponential backoff for transient failures.</p>
+      <p>Configure the maximum number of retries using the maxRetries constructor option.</p>
+      <p>All requests are authenticated automatically using the configured API credentials.</p>
+    </main></body></html>`;
+
+    const markdown = `# API Reference
+
+The API supports the following operations for managing resources securely.
+
+Use bearer tokens for authentication with the API endpoints on every request.
+
+Include your API key in the Authorization header of every request you send.
+
+Requests are limited to one hundred per minute per API key by default now.
+
+Error responses include a JSON body with a code field and a message for details.
+
+Use cursor-based pagination with the after parameter in your requests to pages.
+
+Each page returns up to fifty items by default unless configured otherwise here.
+
+The SDK supports automatic retries with exponential backoff for transient failures.
+
+Configure the maximum number of retries using the maxRetries constructor option.
+
+All requests are authenticated automatically using the configured API credentials.`;
+
+    const url = 'http://mcp-custom-excl.local/docs/api';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-custom-excl.local', {
+      parityExclusions: ['.human-only-ui-steps'],
+    });
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    const pageResults = result.details?.pageResults as Array<{ missingSegments: number }>;
+    expect(pageResults[0].missingSegments).toBe(0);
+  });
+
+  it('reports custom thresholds in details when non-default', async () => {
+    const html =
+      '<html><body><h1>Page</h1><p>Content that matches between HTML and markdown versions exactly.</p></body></html>';
+    const markdown = '# Page\n\nContent that matches between HTML and markdown versions exactly.';
+    const url = 'http://mcp-threshold-report.local/docs/page';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-threshold-report.local', {
+      parityPassThreshold: 10,
+      parityWarnThreshold: 30,
+    });
+    const result = await check.run(ctx);
+    expect(result.details?.parityPassThreshold).toBe(10);
+    expect(result.details?.parityWarnThreshold).toBe(30);
+  });
+
+  it('returns error when parityPassThreshold exceeds parityWarnThreshold', async () => {
+    const html =
+      '<html><body><h1>Page</h1><p>Content that matches between HTML and markdown versions exactly.</p></body></html>';
+    const markdown = '# Page\n\nContent that matches between HTML and markdown versions exactly.';
+    const url = 'http://mcp-inverted.local/docs/page';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-inverted.local', {
+      parityPassThreshold: 20,
+      parityWarnThreshold: 5,
+    });
+    const result = await check.run(ctx);
+    expect(result.status).toBe('error');
+    expect(result.message).toContain('greater than');
+  });
+
+  it('reports a clear error for invalid CSS selectors in parityExclusions', async () => {
+    const html = `<html><body><main>
+      <h1>Page Title</h1>
+      <p>Content that is long enough to form a meaningful segment for comparison.</p>
+      <p>Additional content that provides enough segments for a valid comparison run.</p>
+      <p>Third paragraph ensures we are above the minimum segment threshold limit.</p>
+      <p>Fourth paragraph provides additional content for the comparison to process.</p>
+      <p>Fifth paragraph rounds out the content to ensure robust segment extraction.</p>
+      <p>Sixth paragraph adds more unique text to exceed the minimum required amount.</p>
+      <p>Seventh paragraph continues to build the content body for a thorough check.</p>
+      <p>Eighth paragraph ensures this page has plenty of content for the check run.</p>
+      <p>Ninth paragraph keeps the content flowing with additional unique text here.</p>
+      <p>Tenth paragraph wraps up the page with one final block of meaningful text.</p>
+    </main></body></html>`;
+    const markdown = '# Page Title\n\nSome markdown content here.';
+    const url = 'http://mcp-bad-selector.local/docs/page';
+
+    server.use(
+      http.get(
+        url,
+        () =>
+          new HttpResponse(html, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-bad-selector.local', {
+      parityExclusions: ['[data-foo='],
+    });
+    const result = await check.run(ctx);
+    const pageResults = result.details?.pageResults as Array<{ error?: string }>;
+    expect(pageResults[0].error).toContain('Invalid CSS selector');
+  });
 });
diff --git a/test/unit/helpers/config.test.ts b/test/unit/helpers/config.test.ts
index c12f2a9..b34dc04 100644
--- a/test/unit/helpers/config.test.ts
+++ b/test/unit/helpers/config.test.ts
@@ -254,4 +254,88 @@ describe('findConfig', () => {
 
     await expect(findConfig(configPath)).rejects.toThrow('"pages" must be an array');
   });
+
+  it('accepts properly quoted parityExclusions', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    const configPath = resolve(TMP_DIR, 'parity-ok.yml');
+    await writeFile(
+      configPath,
+      [
+        'url: https://example.com',
+        'options:',
+        '  parityExclusions:',
+        '    - .human-only',
+        '    - \'[data-audience="humans"]\'',
+        '',
+      ].join('\n'),
+    );
+
+    const config = await findConfig(configPath);
+    expect(config?.options?.parityExclusions).toEqual(['.human-only', '[data-audience="humans"]']);
+  });
+
+  it('throws when parityExclusions contains unquoted bracket selector', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    const configPath = resolve(TMP_DIR, 'parity-bad.yml');
+    // Unquoted [data-foo] is parsed by YAML as a nested array, not a string
+    await writeFile(
+      configPath,
+      ['url: https://example.com', 'options:', '  parityExclusions:', '    - [data-foo]', ''].join(
+        '\n',
+      ),
+    );
+
+    await expect(findConfig(configPath)).rejects.toThrow('parityExclusions[0] must be a string');
+  });
+
+  it('throws when coverageExclusions contains a non-string entry', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    const configPath = resolve(TMP_DIR, 'coverage-bad.yml');
+    await writeFile(
+      configPath,
+      [
+        'url: https://example.com',
+        'options:',
+        '  coverageExclusions:',
+        '    - [nested-array]',
+        '',
+      ].join('\n'),
+    );
+
+    await expect(findConfig(configPath)).rejects.toThrow('coverageExclusions[0] must be a string');
+  });
+
+  it('validates exclusions in auto-discovered config', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      [
+        'url: https://example.com',
+        'options:',
+        '  parityExclusions:',
+        '    - [bad-selector]',
+        '',
+      ].join('\n'),
+    );
+
+    await expect(findConfig(undefined, TMP_DIR)).rejects.toThrow(
+      'parityExclusions[0] must be a string',
+    );
+  });
+
+  it('validates exclusions in loadConfig', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      [
+        'url: https://example.com',
+        'options:',
+        '  parityExclusions:',
+        '    - [bad-selector]',
+        '',
+      ].join('\n'),
+    );
+
+    await expect(loadConfig(TMP_DIR)).rejects.toThrow('parityExclusions[0] must be a string');
+  });
 });

From 817c487ea7237063a7aad1ae4188a691224f3876 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 26 Apr 2026 12:20:52 -0400
Subject: [PATCH 08/13] Shore up unit and integration tests

---
 .../tabbed-content-serialization.ts           |   3 +-
 src/helpers/detect-tabs.ts                    |   1 -
 .../integration/cross-check-contracts.test.ts | 404 +++++++++
 test/integration/dependency-chains.test.ts    | 348 ++++++++
 test/integration/scoring-pipeline.test.ts     | 629 ++++++++++++++
 test/unit/checks/llms-txt-coverage.test.ts    | 164 ++++
 .../checks/llms-txt-directive-html.test.ts    |  73 ++
 .../unit/checks/llms-txt-directive-md.test.ts |  79 ++
 test/unit/checks/redirect-behavior.test.ts    |  17 +
 .../tabbed-content-serialization.test.ts      | 185 ++++
 test/unit/cli/formatters.test.ts              | 793 +++++++++++++++++-
 test/unit/cli/scorecard-formatter.test.ts     |  91 ++
 test/unit/helpers/detect-tabs.test.ts         |  98 +++
 .../unit/helpers/get-markdown-content.test.ts | 379 +++++++++
 test/unit/helpers/http.test.ts                |  78 ++
 test/unit/scoring/coefficients.test.ts        |  54 ++
 test/unit/scoring/diagnostics.test.ts         | 118 +++
 test/unit/scoring/proportions.test.ts         | 181 ++++
 test/unit/scoring/resolutions.test.ts         | 162 ++++
 19 files changed, 3848 insertions(+), 9 deletions(-)
 create mode 100644 test/integration/cross-check-contracts.test.ts
 create mode 100644 test/integration/dependency-chains.test.ts
 create mode 100644 test/integration/scoring-pipeline.test.ts
 create mode 100644 test/unit/helpers/get-markdown-content.test.ts

diff --git a/src/checks/content-structure/tabbed-content-serialization.ts b/src/checks/content-structure/tabbed-content-serialization.ts
index 3126ef0..da7f98b 100644
--- a/src/checks/content-structure/tabbed-content-serialization.ts
+++ b/src/checks/content-structure/tabbed-content-serialization.ts
@@ -29,8 +29,7 @@ function worstStatus(statuses: CheckStatus[]): CheckStatus {
 }
 
 function formatSize(chars: number): string {
-  if (chars >= 1000) return `${Math.round(chars / 1000)}K`;
-  return String(chars);
+  return `${Math.round(chars / 1000)}K`;
 }
 
 /**
diff --git a/src/helpers/detect-tabs.ts b/src/helpers/detect-tabs.ts
index 8af0e55..31610dc 100644
--- a/src/helpers/detect-tabs.ts
+++ b/src/helpers/detect-tabs.ts
@@ -211,7 +211,6 @@ const genericAria: Detector = (root, claimed) => {
     }
 
     const { container, panels } = found;
-    if (tabs.length === 0 && panels.length === 0) continue;
 
     // Use tab count as the authority. Containers may hold panels from
     // multiple tab groups; capping to tabs.length avoids misattributing
diff --git a/test/integration/cross-check-contracts.test.ts b/test/integration/cross-check-contracts.test.ts
new file mode 100644
index 0000000..7ab9170
--- /dev/null
+++ b/test/integration/cross-check-contracts.test.ts
@@ -0,0 +1,404 @@
+/**
+ * Cross-check field contract and previousResults safety tests.
+ *
+ * These tests validate that:
+ * 1. Downstream checks handle missing/empty upstream details gracefully
+ * 2. Every check that reads previousResults works in isolation (no deps ran)
+ * 3. Data shape contracts between checks are maintained
+ */
+import { describe, it, expect, beforeAll } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { createContext } from '../../src/runner.js';
+import { getCheck } from '../../src/checks/registry.js';
+import '../../src/checks/index.js';
+import type { DiscoveredFile } from '../../src/types.js';
+import { mockSitemapNotFound } from '../helpers/mock-sitemap-not-found.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+const prose =
+  'This is comprehensive documentation content explaining the feature. ' +
+  'It covers configuration, usage, and troubleshooting. ';
+
+function makeCtx(
+  host: string,
+  opts?: {
+    llmsTxt?: string;
+    llmsTxtStatus?: 'pass' | 'fail';
+  },
+) {
+  const ctx = createContext(`http://${host}`, { requestDelay: 0 });
+
+  if (opts?.llmsTxt) {
+    const discovered: DiscoveredFile[] = [
+      { url: `http://${host}/llms.txt`, content: opts.llmsTxt, status: 200, redirected: false },
+    ];
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'content-discoverability',
+      status: opts.llmsTxtStatus ?? 'pass',
+      message: 'Found',
+      details: { discoveredFiles: discovered },
+    });
+  }
+
+  mockSitemapNotFound(server, `http://${host}`);
+  return ctx;
+}
+
+describe('previousResults safety: checks handle missing dependencies gracefully', () => {
+  describe('checks with declared dependsOn', () => {
+    it('page-size-markdown skips when no markdown deps ran (standalone mode)', async () => {
+      const host = 'contract-psmk.local';
+      const llms = `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`;
+      const ctx = makeCtx(host, { llmsTxt: llms });
+      // No markdown-url-support or content-negotiation in previousResults
+
+      server.use(
+        http.get(`http://${host}/docs/page.md`, () => new HttpResponse(null, { status: 404 })),
+        http.get(
+          `http://${host}/docs/page/index.md`,
+          () => new HttpResponse(null, { status: 404 }),
+        ),
+        http.get(
+          `http://${host}/docs/page`,
+          () =>
+            new HttpResponse(`<html><body><h1>Page</h1><p>${prose}</p></body></html>`, {
+              status: 200,
+              headers: { 'Content-Type': 'text/html' },
+            }),
+        ),
+      );
+
+      const check = getCheck('page-size-markdown')!;
+      const result = await check.run(ctx);
+      // Should not crash — either skips or runs in standalone mode
+      expect(['pass', 'warn', 'fail', 'skip']).toContain(result.status);
+    });
+  });
+
+  describe('checks with soft dependencies (no dependsOn but reads previousResults)', () => {
+    it('section-header-quality skips cleanly without tabbed-content-serialization', async () => {
+      const host = 'contract-shq.local';
+      const ctx = makeCtx(host);
+      // No tabbed-content-serialization in previousResults
+
+      const check = getCheck('section-header-quality')!;
+      const result = await check.run(ctx);
+      expect(result.status).toBe('skip');
+      expect(result.message).toContain('tabbed-content-serialization');
+    });
+
+    it('auth-alternative-access skips cleanly without auth-gate-detection', async () => {
+      const host = 'contract-aaa.local';
+      const ctx = makeCtx(host);
+      // No auth-gate-detection in previousResults
+
+      const check = getCheck('auth-alternative-access')!;
+      const result = await check.run(ctx);
+      expect(result.status).toBe('skip');
+    });
+
+    it('auth-alternative-access skips when auth-gate-detection passed', async () => {
+      const host = 'contract-aaa-pass.local';
+      const ctx = makeCtx(host);
+      ctx.previousResults.set('auth-gate-detection', {
+        id: 'auth-gate-detection',
+        category: 'authentication',
+        status: 'pass',
+        message: 'All accessible',
+      });
+
+      const check = getCheck('auth-alternative-access')!;
+      const result = await check.run(ctx);
+      expect(result.status).toBe('skip');
+      expect(result.message).toContain('publicly accessible');
+    });
+
+    it('tabbed-content-serialization works without rendering-strategy', async () => {
+      const host = 'contract-tcs.local';
+      const llms = `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`;
+      const ctx = makeCtx(host, { llmsTxt: llms });
+      // No rendering-strategy in previousResults
+
+      server.use(
+        http.get(
+          `http://${host}/docs/page`,
+          () =>
+            new HttpResponse(`<html><body><h1>Page</h1><p>${prose}</p></body></html>`, {
+              status: 200,
+              headers: { 'Content-Type': 'text/html' },
+            }),
+        ),
+        http.head(`http://${host}/docs/page`, () => new HttpResponse(null, { status: 200 })),
+      );
+
+      const check = getCheck('tabbed-content-serialization')!;
+      const result = await check.run(ctx);
+      // Should run and not crash — just won't try SPA fallback
+      expect(['pass', 'warn', 'fail']).toContain(result.status);
+    });
+
+    it('cache-header-hygiene works without llms-txt-exists', async () => {
+      const host = 'contract-chh.local';
+      const ctx = createContext(`http://${host}`, { requestDelay: 0 });
+      // No llms-txt-exists in previousResults, no llms.txt
+
+      server.use(
+        http.get(`http://${host}/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+        http.get(`http://${host}/docs/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+        http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })),
+        http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })),
+        http.get(
+          `http://${host}/`,
+          () =>
+            new HttpResponse(
+              `<html><body><h1>Home</h1><a href="http://${host}/docs/page">Page</a></body></html>`,
+              {
+                status: 200,
+                headers: { 'Content-Type': 'text/html', 'Cache-Control': 'max-age=300' },
+              },
+            ),
+        ),
+        http.get(
+          `http://${host}/docs/page`,
+          () =>
+            new HttpResponse(`<html><body><h1>Page</h1><p>${prose}</p></body></html>`, {
+              status: 200,
+              headers: { 'Content-Type': 'text/html', 'Cache-Control': 'max-age=300' },
+            }),
+        ),
+        http.get(`http://${host}/docs/page.md`, () => new HttpResponse(null, { status: 404 })),
+        http.get(
+          `http://${host}/docs/page/index.md`,
+          () => new HttpResponse(null, { status: 404 }),
+        ),
+      );
+
+      const check = getCheck('cache-header-hygiene')!;
+      const result = await check.run(ctx);
+      // Should run without crashing; won't include llms.txt URL in endpoints
+      expect(['pass', 'warn', 'fail']).toContain(result.status);
+    });
+
+    it('llms-txt-directive-md works without cached markdown (fetches independently)', async () => {
+      const host = 'contract-ldm.local';
+      const llms = `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`;
+      const ctx = makeCtx(host, { llmsTxt: llms });
+      // No markdown-url-support or content-negotiation in previousResults
+      // No pageCache entries
+
+      server.use(
+        http.get(
+          `http://${host}/docs/page.md`,
+          () =>
+            new HttpResponse('> See [llms.txt](/llms.txt) for the docs.\n\n# Page\n\nContent.', {
+              status: 200,
+              headers: { 'Content-Type': 'text/markdown' },
+            }),
+        ),
+        http.get(
+          `http://${host}/docs/page/index.md`,
+          () => new HttpResponse(null, { status: 404 }),
+        ),
+        http.get(
+          `http://${host}/docs/page`,
+          () =>
+            new HttpResponse(`<html><body><p>${prose}</p></body></html>`, {
+              status: 200,
+              headers: { 'Content-Type': 'text/html' },
+            }),
+        ),
+      );
+
+      const check = getCheck('llms-txt-directive-md')!;
+      const result = await check.run(ctx);
+      // Should find directive via .md URL fetch (standalone mode)
+      expect(result.status).toBe('pass');
+      expect(result.details?.foundCount).toBe(1);
+    });
+  });
+});
+
+describe('cross-check field contracts: empty/missing upstream details', () => {
+  it('page-size-markdown handles mdUrlResult with no details', async () => {
+    const host = 'contract-psm-empty.local';
+    const llms = `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`;
+    const ctx = makeCtx(host, { llmsTxt: llms });
+
+    // markdown-url-support exists but with no details at all
+    ctx.previousResults.set('markdown-url-support', {
+      id: 'markdown-url-support',
+      category: 'markdown-availability',
+      status: 'pass',
+      message: 'OK',
+      // No details field
+    });
+    // Populate pageCache so the check has content
+    ctx.pageCache.set(`http://${host}/docs/page`, {
+      url: `http://${host}/docs/page`,
+      markdown: { content: '# Page\n\nContent.', source: 'md-url' },
+    });
+
+    const check = getCheck('page-size-markdown')!;
+    const result = await check.run(ctx);
+    // Should not crash — mdUrlMap will be empty, falling back to page.url
+    expect(['pass', 'warn', 'fail']).toContain(result.status);
+    const pageResults = result.details?.pageResults as Array<{ url: string; mdUrl: string }>;
+    expect(pageResults).toBeDefined();
+    // Without mdUrlResult details, mdUrl falls back to the page URL
+    expect(pageResults[0].mdUrl).toBe(`http://${host}/docs/page`);
+  });
+
+  it('page-size-markdown handles mdUrlResult with empty pageResults', async () => {
+    const host = 'contract-psm-empty2.local';
+    const llms = `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`;
+    const ctx = makeCtx(host, { llmsTxt: llms });
+
+    ctx.previousResults.set('markdown-url-support', {
+      id: 'markdown-url-support',
+      category: 'markdown-availability',
+      status: 'pass',
+      message: 'OK',
+      details: { pageResults: [] },
+    });
+    ctx.pageCache.set(`http://${host}/docs/page`, {
+      url: `http://${host}/docs/page`,
+      markdown: { content: '# Page\n\nContent.', source: 'md-url' },
+    });
+
+    const check = getCheck('page-size-markdown')!;
+    const result = await check.run(ctx);
+    expect(['pass', 'warn', 'fail']).toContain(result.status);
+  });
+
+  it('section-header-quality handles tabbed-content-serialization with empty tabbedPages', async () => {
+    const host = 'contract-shq-empty.local';
+    const ctx = makeCtx(host);
+
+    ctx.previousResults.set('tabbed-content-serialization', {
+      id: 'tabbed-content-serialization',
+      category: 'content-structure',
+      status: 'pass',
+      message: 'No tabs',
+      details: { tabbedPages: [] },
+    });
+
+    const check = getCheck('section-header-quality')!;
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('No tabbed content');
+  });
+
+  it('section-header-quality handles tabbed-content-serialization with no details', async () => {
+    const host = 'contract-shq-nodet.local';
+    const ctx = makeCtx(host);
+
+    ctx.previousResults.set('tabbed-content-serialization', {
+      id: 'tabbed-content-serialization',
+      category: 'content-structure',
+      status: 'pass',
+      message: 'No tabs',
+    });
+
+    const check = getCheck('section-header-quality')!;
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+  });
+
+  it('auth-alternative-access handles auth-gate-detection with empty details', async () => {
+    const host = 'contract-aaa-empty.local';
+    const ctx = makeCtx(host);
+
+    ctx.previousResults.set('auth-gate-detection', {
+      id: 'auth-gate-detection',
+      category: 'authentication',
+      status: 'fail',
+      message: 'Auth required',
+      // Empty details — all numeric fields default to 0 via ?? operator
+      details: {},
+    });
+
+    const check = getCheck('auth-alternative-access')!;
+    const result = await check.run(ctx);
+    // Should not crash — gatedCount/accessibleCount default to 0
+    expect(['pass', 'warn', 'fail']).toContain(result.status);
+  });
+
+  it('auth-alternative-access handles missing llms-txt-exists and markdown results', async () => {
+    const host = 'contract-aaa-nodeps.local';
+    const ctx = makeCtx(host);
+
+    ctx.previousResults.set('auth-gate-detection', {
+      id: 'auth-gate-detection',
+      category: 'authentication',
+      status: 'fail',
+      message: 'Auth required',
+      details: {
+        accessible: 0,
+        authRequired: 5,
+        softAuthGate: 0,
+        authRedirect: 0,
+        testedPages: 5,
+      },
+    });
+    // No llms-txt-exists, no markdown-url-support, no content-negotiation
+
+    const check = getCheck('auth-alternative-access')!;
+    const result = await check.run(ctx);
+    // Should return fail (no alternative paths found)
+    expect(result.status).toBe('fail');
+  });
+
+  it('cache-header-hygiene handles llms-txt-exists with no discoveredFiles', async () => {
+    const host = 'contract-chh-nofiles.local';
+    const ctx = createContext(`http://${host}`, { requestDelay: 0 });
+
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'content-discoverability',
+      status: 'pass',
+      message: 'Found',
+      details: { discoveredFiles: [] },
+    });
+
+    server.use(
+      http.get(`http://${host}/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+      http.get(`http://${host}/docs/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+      http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })),
+      http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })),
+      http.get(
+        `http://${host}/`,
+        () =>
+          new HttpResponse(
+            `<html><body><h1>Home</h1><a href="http://${host}/docs/page">Page</a></body></html>`,
+            {
+              status: 200,
+              headers: { 'Content-Type': 'text/html', 'Cache-Control': 'max-age=300' },
+            },
+          ),
+      ),
+      http.get(
+        `http://${host}/docs/page`,
+        () =>
+          new HttpResponse(`<html><body><p>${prose}</p></body></html>`, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html', 'Cache-Control': 'max-age=300' },
+          }),
+      ),
+      http.get(`http://${host}/docs/page.md`, () => new HttpResponse(null, { status: 404 })),
+      http.get(`http://${host}/docs/page/index.md`, () => new HttpResponse(null, { status: 404 })),
+    );
+
+    const check = getCheck('cache-header-hygiene')!;
+    const result = await check.run(ctx);
+    // Should work fine — just fewer endpoints to test
+    expect(['pass', 'warn', 'fail']).toContain(result.status);
+  });
+});
diff --git a/test/integration/dependency-chains.test.ts b/test/integration/dependency-chains.test.ts
new file mode 100644
index 0000000..d1f90bd
--- /dev/null
+++ b/test/integration/dependency-chains.test.ts
@@ -0,0 +1,348 @@
+/**
+ * Dependency chain integration tests.
+ *
+ * Tests the runner's dependency resolution logic:
+ * - OR-gate dependencies (either dep passing is sufficient)
+ * - --skip-checks interaction with dependency resolution
+ * - Soft dependency chains (previousResults reads without dependsOn)
+ */
+import { describe, it, expect, beforeAll } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { runChecks } from '../../src/runner.js';
+import '../../src/checks/index.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+const prose =
+  'This is comprehensive documentation content explaining the feature. ' +
+  'It covers configuration, usage, and troubleshooting. ';
+
+function setupSite(
+  host: string,
+  opts: {
+    llmsTxt?: string;
+    pages: Array<{
+      path: string;
+      html?: string;
+      md?: string;
+      contentNeg?: string;
+    }>;
+  },
+) {
+  const handlers = [];
+
+  if (opts.llmsTxt) {
+    handlers.push(http.get(`http://${host}/llms.txt`, () => HttpResponse.text(opts.llmsTxt!)));
+  } else {
+    handlers.push(
+      http.get(`http://${host}/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+    );
+  }
+  handlers.push(
+    http.get(`http://${host}/docs/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+    http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })),
+    http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })),
+  );
+
+  for (const page of opts.pages) {
+    handlers.push(
+      http.get(`http://${host}${page.path}`, ({ request }) => {
+        const accept = request.headers.get('accept') ?? '';
+        if (accept.includes('text/markdown') && page.contentNeg) {
+          return new HttpResponse(page.contentNeg, {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          });
+        }
+        return new HttpResponse(
+          page.html ?? `<html><body><h1>Page</h1><p>${prose}</p></body></html>`,
+          { status: 200, headers: { 'Content-Type': 'text/html' } },
+        );
+      }),
+      http.head(`http://${host}${page.path}`, () => new HttpResponse(null, { status: 200 })),
+    );
+
+    if (page.md) {
+      handlers.push(
+        http.get(
+          `http://${host}${page.path}.md`,
+          () =>
+            new HttpResponse(page.md!, {
+              status: 200,
+              headers: { 'Content-Type': 'text/markdown' },
+            }),
+        ),
+        http.get(
+          `http://${host}${page.path}/index.md`,
+          () =>
+            new HttpResponse(page.md!, {
+              status: 200,
+              headers: { 'Content-Type': 'text/markdown' },
+            }),
+        ),
+      );
+    } else {
+      handlers.push(
+        http.get(`http://${host}${page.path}.md`, () => new HttpResponse(null, { status: 404 })),
+        http.get(
+          `http://${host}${page.path}/index.md`,
+          () => new HttpResponse(null, { status: 404 }),
+        ),
+      );
+    }
+  }
+
+  server.use(...handlers);
+}
+
+describe('OR-gate dependencies', () => {
+  it('page-size-markdown runs when markdown-url-support passes but content-negotiation fails', async () => {
+    const host = 'dep-or-md.local';
+    const md = `# Page\n\n${prose}\n`;
+    setupSite(host, {
+      llmsTxt: `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`,
+      pages: [{ path: '/docs/page', md }],
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'markdown-url-support',
+        'content-negotiation',
+        'page-size-markdown',
+      ],
+    });
+
+    expect(report.results.find((r) => r.id === 'markdown-url-support')?.status).toBe('pass');
+    expect(report.results.find((r) => r.id === 'content-negotiation')?.status).toBe('fail');
+    // OR-gate: one passed → page-size-markdown should run, not skip
+    const psm = report.results.find((r) => r.id === 'page-size-markdown')!;
+    expect(psm.status).not.toBe('skip');
+  });
+
+  it('page-size-markdown runs when content-negotiation passes but markdown-url-support fails', async () => {
+    const host = 'dep-or-cn.local';
+    const cnContent = `# Page\n\n${prose}\n`;
+    setupSite(host, {
+      llmsTxt: `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`,
+      pages: [{ path: '/docs/page', contentNeg: cnContent }],
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'markdown-url-support',
+        'content-negotiation',
+        'page-size-markdown',
+      ],
+    });
+
+    expect(report.results.find((r) => r.id === 'markdown-url-support')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'content-negotiation')?.status).toBe('pass');
+    const psm = report.results.find((r) => r.id === 'page-size-markdown')!;
+    expect(psm.status).not.toBe('skip');
+  });
+
+  it('page-size-markdown skips when both OR-gate deps fail', async () => {
+    const host = 'dep-or-both.local';
+    setupSite(host, {
+      llmsTxt: `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`,
+      pages: [{ path: '/docs/page' }],
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'markdown-url-support',
+        'content-negotiation',
+        'page-size-markdown',
+      ],
+    });
+
+    expect(report.results.find((r) => r.id === 'markdown-url-support')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'content-negotiation')?.status).toBe('fail');
+    const psm = report.results.find((r) => r.id === 'page-size-markdown')!;
+    expect(psm.status).toBe('skip');
+    expect(psm.message).toContain('dependency');
+  });
+});
+
+describe('--skip-checks interaction with dependencies', () => {
+  it('skipped check emits skip result but does not block downstream standalone mode', async () => {
+    const host = 'dep-skipcheck.local';
+    setupSite(host, {
+      llmsTxt: `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`,
+      pages: [{ path: '/docs/page', md: `# Page\n\n${prose}\n` }],
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'markdown-url-support',
+        'content-negotiation',
+        'page-size-markdown',
+      ],
+      skipCheckIds: ['markdown-url-support'],
+    });
+
+    // markdown-url-support should be explicitly skipped
+    const mdUrl = report.results.find((r) => r.id === 'markdown-url-support')!;
+    expect(mdUrl.status).toBe('skip');
+    expect(mdUrl.message).toContain('--skip-checks');
+
+    // content-negotiation should still run (not affected by skip)
+    const cn = report.results.find((r) => r.id === 'content-negotiation')!;
+    expect(cn.status).not.toBe('skip');
+
+    // page-size-markdown depends on OR(markdown-url-support, content-negotiation).
+    // markdown-url-support is skipped (not stored in previousResults, so not "ran and failed").
+    // content-negotiation ran. Whether page-size-markdown runs depends on CN's result.
+    const psm = report.results.find((r) => r.id === 'page-size-markdown')!;
+    // If CN failed, the skip of md-url means "dep never ran" for md-url,
+    // but CN DID run and failed, so the OR-gate has one "ran and failed"
+    // and one "never ran". The runner behavior: anyDepRan = true (CN ran),
+    // checkDependenciesMet: CN failed, md-url never ran → OR-group fails → skip
+    if (cn.status === 'fail') {
+      expect(psm.status).toBe('skip');
+    }
+  });
+
+  it('skipping a dependency via --skip-checks lets runner invoke downstream, but check self-skips without data', async () => {
+    const host = 'dep-skiponly.local';
+    setupSite(host, {
+      llmsTxt: `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`,
+      pages: [{ path: '/docs/page', md: `# Page\n\n${prose}\n` }],
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: ['llms-txt-exists', 'llms-txt-valid'],
+      skipCheckIds: ['llms-txt-exists'],
+    });
+
+    const exists = report.results.find((r) => r.id === 'llms-txt-exists')!;
+    expect(exists.status).toBe('skip');
+    expect(exists.message).toContain('--skip-checks');
+
+    // The runner does NOT block llms-txt-valid (dep "never ran" from runner's
+    // perspective, since --skip-checks results aren't stored in previousResults).
+    // But llms-txt-valid reads previousResults.get('llms-txt-exists') for data
+    // and self-skips with a different message — it can't validate without files.
+    const valid = report.results.find((r) => r.id === 'llms-txt-valid')!;
+    expect(valid.status).toBe('skip');
+    expect(valid.message).toContain('No llms.txt files');
+    // Crucially, the message should NOT say "dependency" (that's the runner's skip)
+    expect(valid.message).not.toContain('dependency');
+  });
+});
+
+describe('soft dependency chains', () => {
+  it('rendering-strategy → tabbed-content-serialization → section-header-quality chain', async () => {
+    const host = 'dep-soft-chain.local';
+    // Tab panels must contain section headers for section-header-quality to analyze them
+    const tabHtml = `
+      <html><body>
+        <h1>Docs</h1>
+        <div class="sphinx-tabs">
+          <div class="sphinx-tabs-tab">Python</div>
+          <div class="sphinx-tabs-tab">JavaScript</div>
+          <div class="sphinx-tabs-panel"><h2>Installation</h2><p>pip install foo</p><h2>Usage</h2><p>import foo</p></div>
+          <div class="sphinx-tabs-panel"><h2>Installation</h2><p>npm install foo</p><h2>Usage</h2><p>require('foo')</p></div>
+        </div>
+        <p>${prose}</p>
+      </body></html>
+    `;
+    setupSite(host, {
+      llmsTxt: `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`,
+      pages: [{ path: '/docs/page', html: tabHtml }],
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'rendering-strategy',
+        'tabbed-content-serialization',
+        'section-header-quality',
+      ],
+    });
+
+    // rendering-strategy should pass (server-rendered HTML)
+    expect(report.results.find((r) => r.id === 'rendering-strategy')?.status).toBe('pass');
+
+    // tabbed-content-serialization should detect the sphinx tabs
+    const tcs = report.results.find((r) => r.id === 'tabbed-content-serialization')!;
+    expect(tcs.status).toBe('pass');
+    expect(tcs.details?.totalGroupsFound).toBeGreaterThan(0);
+
+    // section-header-quality reads from tabbed-content-serialization's tabbedPages
+    const shq = report.results.find((r) => r.id === 'section-header-quality')!;
+    // It should run (not skip) and analyze the tab groups
+    expect(shq.status).not.toBe('skip');
+    expect(shq.details?.analyses).toBeDefined();
+  });
+
+  it('section-header-quality skips when tabbed-content-serialization finds no tabs', async () => {
+    const host = 'dep-soft-notabs.local';
+    setupSite(host, {
+      llmsTxt: `# Docs\n## Links\n- [Page](http://${host}/docs/page): Page\n`,
+      pages: [{ path: '/docs/page' }],
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: ['llms-txt-exists', 'tabbed-content-serialization', 'section-header-quality'],
+    });
+
+    const tcs = report.results.find((r) => r.id === 'tabbed-content-serialization')!;
+    expect(tcs.status).toBe('pass');
+    expect(tcs.details?.totalGroupsFound).toBe(0);
+
+    // section-header-quality should pass (no tabs to analyze)
+    const shq = report.results.find((r) => r.id === 'section-header-quality')!;
+    expect(shq.status).toBe('pass');
+    expect(shq.message).toContain('No tabbed content');
+  });
+
+  it('llms-txt-exists fail cascades through all llms-txt-* checks', async () => {
+    const host = 'dep-cascade.local';
+    setupSite(host, { pages: [{ path: '/docs/page' }] });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'llms-txt-valid',
+        'llms-txt-size',
+        'llms-txt-links-resolve',
+        'llms-txt-links-markdown',
+        'llms-txt-coverage',
+      ],
+    });
+
+    expect(report.results.find((r) => r.id === 'llms-txt-exists')?.status).toBe('fail');
+
+    // All downstream checks should skip due to dependency failure
+    for (const id of [
+      'llms-txt-valid',
+      'llms-txt-size',
+      'llms-txt-links-resolve',
+      'llms-txt-links-markdown',
+      'llms-txt-coverage',
+    ]) {
+      const result = report.results.find((r) => r.id === id)!;
+      expect(result.status, `${id} should skip`).toBe('skip');
+      expect(result.message).toContain('dependency');
+    }
+  });
+});
diff --git a/test/integration/scoring-pipeline.test.ts b/test/integration/scoring-pipeline.test.ts
new file mode 100644
index 0000000..1c7aea6
--- /dev/null
+++ b/test/integration/scoring-pipeline.test.ts
@@ -0,0 +1,629 @@
+/**
+ * End-to-end scoring pipeline integration tests.
+ *
+ * These tests run real checks through the runner via runChecks(), then feed
+ * the results through computeScore() to validate that the full pipeline
+ * produces correct coefficients, diagnostics, caps, and category scores.
+ *
+ * Each test targets one specific scoring behavior using the minimum set of
+ * checks needed, matching the established pattern in check-pipeline.test.ts.
+ */
+import { describe, it, expect, beforeAll } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { runChecks } from '../../src/runner.js';
+import { computeScore } from '../../src/scoring/score.js';
+import '../../src/checks/index.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+/**
+ * Set up a mock docs site. Handles llms.txt, page variants, sitemap,
+ * bad-URL 404s for http-status-codes, and HEAD handlers for link resolution.
+ */
+function setupSite(
+  host: string,
+  opts: {
+    llmsTxt?: string;
+    pages: Array<{
+      path: string;
+      html?: string;
+      md?: string;
+      contentNeg?: string;
+      status?: number;
+    }>;
+    sitemap?: string[];
+    cacheControl?: string;
+  },
+) {
+  const handlers = [];
+  const defaultCacheHeaders: Record<string, string> = {};
+  if (opts.cacheControl) defaultCacheHeaders['Cache-Control'] = opts.cacheControl;
+
+  // llms.txt
+  if (opts.llmsTxt) {
+    handlers.push(
+      http.get(
+        `http://${host}/llms.txt`,
+        () =>
+          new HttpResponse(opts.llmsTxt!, {
+            status: 200,
+            headers: { 'Content-Type': 'text/plain', ...defaultCacheHeaders },
+          }),
+      ),
+    );
+  } else {
+    handlers.push(
+      http.get(`http://${host}/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+    );
+  }
+  handlers.push(
+    http.get(`http://${host}/docs/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+  );
+
+  // Sitemap
+  if (opts.sitemap) {
+    const locs = opts.sitemap.map((u) => `<url><loc>${u}</loc></url>`).join('\n');
+    const xml = `<?xml version="1.0"?>\n<urlset>\n${locs}\n</urlset>`;
+    handlers.push(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(xml, {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+  } else {
+    handlers.push(
+      http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })),
+      http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })),
+    );
+  }
+
+  // Root URL for homepage-based discovery
+  const pageLinks = opts.pages
+    .map((p) => `<a href="http://${host}${p.path}">${p.path}</a>`)
+    .join('\n');
+  const rootHtml = `<html><body><h1>Docs Home</h1><p>Welcome to the documentation site. Browse the pages below for comprehensive guides and API references.</p>${pageLinks}</body></html>`;
+  handlers.push(
+    http.get(
+      `http://${host}/`,
+      () =>
+        new HttpResponse(rootHtml, {
+          status: 200,
+          headers: { 'Content-Type': 'text/html', ...defaultCacheHeaders },
+        }),
+    ),
+  );
+
+  // Prose long enough to pass content-start-position heuristics (>40 chars with punctuation)
+  const defaultProse =
+    'This is comprehensive documentation content explaining the feature in detail. ' +
+    'It covers configuration options, usage patterns, and troubleshooting steps. ' +
+    'Read through each section carefully for the best results.';
+
+  for (const page of opts.pages) {
+    const pageStatus = page.status ?? 200;
+    const defaultHtml = `<html><body><h1>Documentation</h1><p>${defaultProse}</p></body></html>`;
+
+    // GET handler (HTML or content-negotiation)
+    handlers.push(
+      http.get(`http://${host}${page.path}`, ({ request }) => {
+        const accept = request.headers.get('accept') ?? '';
+        if (accept.includes('text/markdown') && page.contentNeg) {
+          return new HttpResponse(page.contentNeg, {
+            status: pageStatus,
+            headers: { 'Content-Type': 'text/markdown', ...defaultCacheHeaders },
+          });
+        }
+        return new HttpResponse(page.html ?? defaultHtml, {
+          status: pageStatus,
+          headers: { 'Content-Type': 'text/html', ...defaultCacheHeaders },
+        });
+      }),
+      http.head(
+        `http://${host}${page.path}`,
+        () => new HttpResponse(null, { status: pageStatus, headers: defaultCacheHeaders }),
+      ),
+    );
+
+    // .md URL variants
+    if (page.md) {
+      for (const suffix of ['.md', '/index.md']) {
+        handlers.push(
+          http.get(
+            `http://${host}${page.path}${suffix}`,
+            () =>
+              new HttpResponse(page.md!, {
+                status: 200,
+                headers: { 'Content-Type': 'text/markdown', ...defaultCacheHeaders },
+              }),
+          ),
+          http.head(
+            `http://${host}${page.path}${suffix}`,
+            () =>
+              new HttpResponse(null, {
+                status: 200,
+                headers: { 'Content-Type': 'text/markdown', ...defaultCacheHeaders },
+              }),
+          ),
+        );
+      }
+    } else {
+      handlers.push(
+        http.get(`http://${host}${page.path}.md`, () => new HttpResponse(null, { status: 404 })),
+        http.get(
+          `http://${host}${page.path}/index.md`,
+          () => new HttpResponse(null, { status: 404 }),
+        ),
+      );
+    }
+
+    // Bad URL for http-status-codes (proper 404)
+    const badPath = `${page.path}-afdocs-nonexistent-8f3a`;
+    handlers.push(
+      http.get(`http://${host}${badPath}`, () => new HttpResponse('Not Found', { status: 404 })),
+    );
+  }
+
+  // Catch-all bad URL pattern for pages discovered via homepage fallback
+  handlers.push(
+    http.get(
+      `http://${host}/*afdocs-nonexistent*`,
+      () => new HttpResponse('Not Found', { status: 404 }),
+    ),
+  );
+
+  server.use(...handlers);
+}
+
+function makePages(
+  host: string,
+  count: number,
+  opts?: { md?: boolean; contentNeg?: boolean; directive?: boolean },
+) {
+  const pages = [];
+  const links = [];
+  const prose =
+    'This is comprehensive documentation content explaining the feature in detail. ' +
+    'It covers configuration, usage patterns, and troubleshooting. ';
+  for (let i = 0; i < count; i++) {
+    const path = `/docs/page-${i}`;
+    const directiveHtml = opts?.directive ? '<a href="/llms.txt">Documentation Index</a>' : '';
+    const directiveMd = opts?.directive
+      ? '> See [llms.txt](/llms.txt) for the documentation index.\n\n'
+      : '';
+    const html = `<html><body>${directiveHtml}<h1>Page ${i}</h1><p>${prose}</p></body></html>`;
+    const md = opts?.md ? `${directiveMd}# Page ${i}\n\n${prose}\n` : undefined;
+    const contentNeg = opts?.contentNeg ? `${directiveMd}# Page ${i}\n\n${prose}\n` : undefined;
+    pages.push({ path, html, md, contentNeg });
+    links.push(`- [Page ${i}](http://${host}${path}): Page ${i}`);
+  }
+  return { pages, links };
+}
+
+describe('scoring pipeline: discovery coefficient 0.8 tier', () => {
+  const host = 'score-disc-08.local';
+
+  it('directive passes without content-negotiation → coefficient 0.8 on markdown checks', async () => {
+    const { pages, links } = makePages(host, 6, { md: true, directive: true });
+    const llmsTxt = `# Docs\n\n> Index.\n\n## Links\n${links.join('\n')}\n`;
+
+    setupSite(host, { llmsTxt, pages, cacheControl: 'max-age=300' });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'llms-txt-directive-html',
+        'markdown-url-support',
+        'content-negotiation',
+        'page-size-markdown',
+        'markdown-code-fence-validity',
+      ],
+    });
+    const score = computeScore(report);
+
+    expect(report.results.find((r) => r.id === 'content-negotiation')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'llms-txt-directive-html')?.status).toBe('pass');
+
+    const mdSizeScore = score.checkScores['page-size-markdown'];
+    if (mdSizeScore) {
+      expect(mdSizeScore.coefficient).toBe(0.8);
+    }
+    const fenceScore = score.checkScores['markdown-code-fence-validity'];
+    if (fenceScore) {
+      expect(fenceScore.coefficient).toBe(0.8);
+    }
+  });
+});
+
+describe('scoring pipeline: discovery coefficient zeroes out markdown checks', () => {
+  const host = 'score-disc-zero.local';
+
+  it('no discovery signal → coefficient 0 on markdown quality checks', async () => {
+    const { pages, links } = makePages(host, 6);
+    const llmsTxt = `# Docs\n\n> Index.\n\n## Links\n${links.join('\n')}\n`;
+
+    setupSite(host, { llmsTxt, pages, cacheControl: 'max-age=300' });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'llms-txt-directive-html',
+        'markdown-url-support',
+        'content-negotiation',
+        'page-size-markdown',
+        'markdown-code-fence-validity',
+      ],
+    });
+    const score = computeScore(report);
+
+    expect(report.results.find((r) => r.id === 'content-negotiation')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'markdown-url-support')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'llms-txt-directive-html')?.status).toBe('fail');
+
+    for (const checkId of ['page-size-markdown', 'markdown-code-fence-validity']) {
+      const cs = score.checkScores[checkId];
+      if (cs) {
+        expect(cs.coefficient).toBe(0);
+        expect(cs.effectiveWeight).toBe(0);
+      }
+    }
+  });
+});
+
+describe('scoring pipeline: SPA diagnostics and HTML path coefficient', () => {
+  const host = 'score-spa.local';
+
+  it('SPA shells fire diagnostics and discount HTML path checks', async () => {
+    const { pages, links } = makePages(host, 6, { md: true });
+    const llmsTxt = `# Docs\n\n> Index.\n\n## Links\n${links.join('\n')}\n`;
+
+    const spaHtml =
+      '<html><head><style>' +
+      'x'.repeat(5000) +
+      '</style></head><body><div id="__next"></div></body></html>';
+    const spaPages = pages.map((p) => ({ ...p, html: spaHtml }));
+
+    setupSite(host, { llmsTxt, pages: spaPages, cacheControl: 'max-age=300' });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'markdown-url-support',
+        'content-negotiation',
+        'llms-txt-directive-html',
+        'rendering-strategy',
+        'page-size-html',
+      ],
+    });
+    const score = computeScore(report);
+
+    expect(report.results.find((r) => r.id === 'rendering-strategy')?.status).toBe('fail');
+    expect(score.diagnostics.find((d) => d.id === 'markdown-undiscoverable')).toBeDefined();
+    expect(score.diagnostics.find((d) => d.id === 'spa-shell-html-invalid')).toBeDefined();
+
+    const htmlSizeScore = score.checkScores['page-size-html'];
+    if (htmlSizeScore) {
+      expect(htmlSizeScore.coefficient).toBeLessThan(1.0);
+    }
+  });
+});
+
+describe('scoring pipeline: dependency skip propagation into scoring', () => {
+  const host = 'score-skip.local';
+
+  it('skipped checks are excluded from score computation', async () => {
+    const { pages } = makePages(host, 6);
+    setupSite(host, { pages, cacheControl: 'max-age=300' });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: ['llms-txt-exists', 'llms-txt-valid', 'llms-txt-size'],
+    });
+    const score = computeScore(report);
+
+    expect(report.results.find((r) => r.id === 'llms-txt-exists')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'llms-txt-valid')?.status).toBe('skip');
+    expect(report.results.find((r) => r.id === 'llms-txt-size')?.status).toBe('skip');
+
+    expect(score.checkScores['llms-txt-valid']).toBeUndefined();
+    expect(score.checkScores['llms-txt-size']).toBeUndefined();
+    expect(score.checkScores['llms-txt-exists']).toBeDefined();
+  });
+});
+
+describe('scoring pipeline: index truncation coefficient', () => {
+  const host = 'score-trunc.local';
+
+  it('large llms.txt reduces weight of downstream index quality checks', async () => {
+    const { pages, links } = makePages(host, 6);
+    // Pad with long descriptions (not extra links) to exceed 100K
+    const longDesc = 'A'.repeat(200);
+    const filler = Array.from(
+      { length: 500 },
+      (_, i) => `- [Filler ${i}](http://${host}/docs/filler-${i}): ${longDesc}`,
+    ).join('\n');
+    const llmsTxt = `# Docs\n\n> Index.\n\n## Links\n${links.join('\n')}\n${filler}\n`;
+
+    setupSite(host, { llmsTxt, pages, cacheControl: 'max-age=300' });
+
+    // Only size-related checks — skip link resolution (would HEAD 500+ URLs)
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: ['llms-txt-exists', 'llms-txt-valid', 'llms-txt-size'],
+    });
+    const score = computeScore(report);
+
+    expect(report.results.find((r) => r.id === 'llms-txt-size')?.status).toBe('fail');
+    expect(score.diagnostics.find((d) => d.id === 'truncated-index')).toBeDefined();
+
+    const validScore = score.checkScores['llms-txt-valid'];
+    expect(validScore).toBeDefined();
+    expect(validScore.coefficient).toBeLessThan(1.0);
+    expect(validScore.coefficient).toBeGreaterThan(0);
+  });
+});
+
+describe('scoring pipeline: single-page fallback produces notApplicable scoring', () => {
+  const host = 'score-single.local';
+
+  it('page-level checks get notApplicable when only 1 page is discovered', async () => {
+    // No llms.txt, no sitemap → discovery falls back to [baseUrl], testedPages = 1
+    setupSite(host, {
+      pages: [{ path: '/' }],
+      cacheControl: 'max-age=300',
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: ['llms-txt-exists', 'rendering-strategy', 'page-size-html', 'http-status-codes'],
+    });
+    const score = computeScore(report);
+
+    expect(score.checkScores['llms-txt-exists'].scoreDisplayMode).toBe('numeric');
+
+    // Page-level checks should be notApplicable with only 1 discovered page
+    for (const checkId of ['rendering-strategy', 'page-size-html', 'http-status-codes']) {
+      const cs = score.checkScores[checkId];
+      if (cs) {
+        expect(cs.scoreDisplayMode, `${checkId} should be notApplicable`).toBe('notApplicable');
+      }
+    }
+
+    expect(score.diagnostics.find((d) => d.id === 'single-page-sample')).toBeDefined();
+  });
+});
+
+describe('scoring pipeline: resolutions populated for real check failures', () => {
+  const host = 'score-resolutions.local';
+
+  it('each failing check produces a resolution string', async () => {
+    const { pages } = makePages(host, 6);
+    setupSite(host, { pages, cacheControl: 'max-age=300' });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: ['llms-txt-exists', 'markdown-url-support', 'content-negotiation'],
+    });
+    const score = computeScore(report);
+
+    // All three should fail
+    for (const id of ['llms-txt-exists', 'markdown-url-support', 'content-negotiation']) {
+      expect(report.results.find((r) => r.id === id)?.status).toBe('fail');
+      expect(score.resolutions[id], `missing resolution for ${id}`).toBeDefined();
+      expect(score.resolutions[id].length).toBeGreaterThan(0);
+    }
+  });
+});
+
+describe('scoring pipeline: category scores from mixed results', () => {
+  const host = 'score-categories.local';
+
+  it('computes per-category scores from a realistic mixed run', async () => {
+    const { pages, links } = makePages(host, 8, { md: true, contentNeg: true, directive: true });
+    const llmsTxt = `# Docs\n\n> Complete documentation index.\n\n## Links\n${links.join('\n')}\n`;
+
+    setupSite(host, {
+      llmsTxt,
+      pages,
+      sitemap: pages.map((p) => `http://${host}${p.path}`),
+      cacheControl: 'max-age=300',
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'llms-txt-valid',
+        'llms-txt-size',
+        'markdown-url-support',
+        'content-negotiation',
+        'rendering-strategy',
+        'page-size-html',
+        'page-size-markdown',
+        'http-status-codes',
+        'auth-gate-detection',
+        'cache-header-hygiene',
+      ],
+    });
+    const score = computeScore(report);
+
+    // content-discoverability should be high (llms-txt-exists, valid, size all pass)
+    expect(score.categoryScores['content-discoverability']).toBeDefined();
+    expect(score.categoryScores['content-discoverability'].score).toBeGreaterThanOrEqual(80);
+
+    // markdown-availability should pass (both CN and md-url pass)
+    expect(score.categoryScores['markdown-availability']).toBeDefined();
+    expect(score.categoryScores['markdown-availability'].score).toBe(100);
+
+    // Each category should have a grade
+    for (const [, catScore] of Object.entries(score.categoryScores)) {
+      if (catScore.score !== null) {
+        expect(catScore.grade).toBeDefined();
+      }
+    }
+  });
+});
+
+describe('scoring pipeline: no-viable-path diagnostic', () => {
+  const host = 'score-novpath.local';
+
+  it('fires when no llms.txt, SPA shells, and no markdown', async () => {
+    const spaHtml =
+      '<html><head><style>' +
+      'x'.repeat(5000) +
+      '</style></head><body><div id="__next"></div></body></html>';
+
+    const pageUrls = Array.from({ length: 6 }, (_, i) => `http://${host}/docs/page-${i}`);
+
+    setupSite(host, {
+      pages: pageUrls.map((url) => ({
+        path: new URL(url).pathname,
+        html: spaHtml,
+      })),
+      sitemap: pageUrls,
+      cacheControl: 'max-age=300',
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'rendering-strategy',
+        'markdown-url-support',
+        'content-negotiation',
+        'llms-txt-directive-html',
+      ],
+    });
+    const score = computeScore(report);
+
+    expect(report.results.find((r) => r.id === 'llms-txt-exists')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'rendering-strategy')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'markdown-url-support')?.status).toBe('fail');
+
+    const nvp = score.diagnostics.find((d) => d.id === 'no-viable-path');
+    expect(nvp).toBeDefined();
+    expect(nvp!.severity).toBe('critical');
+  });
+});
+
+describe('scoring pipeline: auth-no-alternative diagnostic', () => {
+  const host = 'score-authno.local';
+
+  it('fires when all pages are auth-gated with no alternative access', async () => {
+    const pageUrls = Array.from({ length: 6 }, (_, i) => `http://${host}/docs/page-${i}`);
+
+    // All pages return 401
+    const handlers = [
+      http.get(`http://${host}/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+      http.get(`http://${host}/docs/llms.txt`, () => new HttpResponse(null, { status: 404 })),
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(`http://${host}/sitemap.xml`, () => {
+        const locs = pageUrls.map((u) => `<url><loc>${u}</loc></url>`).join('\n');
+        return new HttpResponse(`<?xml version="1.0"?>\n<urlset>\n${locs}\n</urlset>`, {
+          status: 200,
+          headers: { 'content-type': 'application/xml' },
+        });
+      }),
+      http.get(
+        `http://${host}/`,
+        () =>
+          new HttpResponse('<html><body><h1>Login Required</h1></body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    ];
+
+    for (const url of pageUrls) {
+      const path = new URL(url).pathname;
+      handlers.push(
+        http.get(`http://${host}${path}`, () => new HttpResponse('Unauthorized', { status: 401 })),
+        http.head(`http://${host}${path}`, () => new HttpResponse(null, { status: 401 })),
+        http.get(`http://${host}${path}.md`, () => new HttpResponse(null, { status: 404 })),
+        http.get(`http://${host}${path}/index.md`, () => new HttpResponse(null, { status: 404 })),
+      );
+    }
+
+    server.use(...handlers);
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'auth-gate-detection',
+        'auth-alternative-access',
+        'markdown-url-support',
+        'content-negotiation',
+      ],
+    });
+    const score = computeScore(report);
+
+    expect(report.results.find((r) => r.id === 'auth-gate-detection')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'auth-alternative-access')?.status).toBe('fail');
+
+    const authDiag = score.diagnostics.find((d) => d.id === 'auth-no-alternative');
+    expect(authDiag).toBeDefined();
+    expect(authDiag!.severity).toBe('critical');
+  });
+});
+
+describe('scoring pipeline: page-size-no-markdown-escape diagnostic', () => {
+  const host = 'score-bightml.local';
+
+  it('fires when HTML pages are oversized and no markdown path exists', async () => {
+    const bigContent = 'x'.repeat(110_000);
+    const bigHtml = `<html><body><h1>Docs</h1><p>${bigContent}</p></body></html>`;
+    const { links } = makePages(host, 6);
+    const llmsTxt = `# Docs\n\n> Index.\n\n## Links\n${links.join('\n')}\n`;
+
+    // Override pages with huge HTML
+    const pages = Array.from({ length: 6 }, (_, i) => ({
+      path: `/docs/page-${i}`,
+      html: bigHtml,
+    }));
+
+    setupSite(host, {
+      llmsTxt,
+      pages,
+      cacheControl: 'max-age=300',
+    });
+
+    const report = await runChecks(`http://${host}`, {
+      requestDelay: 0,
+      checkIds: [
+        'llms-txt-exists',
+        'markdown-url-support',
+        'content-negotiation',
+        'llms-txt-directive-html',
+        'page-size-html',
+      ],
+    });
+    const score = computeScore(report);
+
+    expect(report.results.find((r) => r.id === 'page-size-html')?.status).toBe('fail');
+    expect(report.results.find((r) => r.id === 'markdown-url-support')?.status).toBe('fail');
+
+    const diag = score.diagnostics.find((d) => d.id === 'page-size-no-markdown-escape');
+    expect(diag).toBeDefined();
+    expect(diag!.severity).toBe('warning');
+  });
+});
diff --git a/test/unit/checks/llms-txt-coverage.test.ts b/test/unit/checks/llms-txt-coverage.test.ts
index 72412fb..aca7a4f 100644
--- a/test/unit/checks/llms-txt-coverage.test.ts
+++ b/test/unit/checks/llms-txt-coverage.test.ts
@@ -797,6 +797,170 @@ describe('filterToUnprefixedLocale', () => {
   });
 });
 
+describe('edge cases', () => {
+  test('warns when passThreshold < warnThreshold', async () => {
+    const host = 'cov-threshold-warn.local';
+    const allPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
+    const llmsPages = allPages.slice(0, 9);
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+    ctx.options.coveragePassThreshold = 50;
+    ctx.options.coverageWarnThreshold = 80;
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(allPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.details?.thresholdWarnings).toBeDefined();
+    const warnings = result.details?.thresholdWarnings as string[];
+    expect(warnings[0]).toContain('warn state is unreachable');
+  });
+
+  test('handles malformed URLs in sitemap gracefully', async () => {
+    const host = 'cov-malformed.local';
+    const goodPages = [`http://${host}/docs/guide`];
+
+    const ctx = makeCtx(host, goodPages, '/docs');
+
+    const sitemapXml = `<?xml version="1.0"?>
+<urlset>
+<url><loc>http://${host}/docs/guide</loc></url>
+<url><loc>not-a-valid-url</loc></url>
+</urlset>`;
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(sitemapXml, {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.coverageRate).toBe(100);
+  });
+
+  test('handles malformed URLs in llms.txt unmatched link check', async () => {
+    const host = 'cov-malformed-llms.local';
+    const sitemapPages = [`http://${host}/docs/guide`];
+
+    const llmsTxt = [
+      '# Docs\n',
+      `- [Guide](http://${host}/docs/guide)`,
+      '- [Bad](not-a-valid-url)',
+    ].join('\n');
+
+    const baseUrl = `http://${host}/docs`;
+    const ctx = createContext(baseUrl, { requestDelay: 0 });
+    const discovered: DiscoveredFile[] = [
+      { url: `http://${host}/llms.txt`, content: llmsTxt, status: 200, redirected: false },
+    ];
+    ctx.previousResults.set('llms-txt-exists', {
+      id: 'llms-txt-exists',
+      category: 'content-discoverability',
+      status: 'pass',
+      message: 'Found',
+      details: { discoveredFiles: discovered },
+    });
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    expect(result.details?.coverageRate).toBe(100);
+  });
+
+  test('does not count llms.txt links outside baseUrl path as unmatched', async () => {
+    const host = 'cov-outside-base.local';
+    const sitemapPages = [`http://${host}/docs/guide`];
+    // llms.txt links include one outside the /docs base path
+    const llmsPages = [`http://${host}/docs/guide`, `http://${host}/other/page`];
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    // /other/page is outside /docs base path, so not counted as unmatched
+    expect(result.details?.unmatchedCount).toBe(0);
+  });
+
+  test('excludes blog/pricing from unmatched llms.txt link check', async () => {
+    const host = 'cov-exclude-unmatched.local';
+    const sitemapPages = [`http://${host}/docs/guide`];
+    const llmsPages = [`http://${host}/docs/guide`, `http://${host}/docs/blog/post-1`];
+
+    const ctx = makeCtx(host, llmsPages, '/docs');
+
+    server.use(
+      http.get(
+        `http://${host}/robots.txt`,
+        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
+      ),
+      http.get(
+        `http://${host}/sitemap.xml`,
+        () =>
+          new HttpResponse(makeSitemap(sitemapPages), {
+            status: 200,
+            headers: { 'content-type': 'application/xml' },
+          }),
+      ),
+    );
+
+    const result = await check.run(ctx);
+    expect(result.status).toBe('pass');
+    // /docs/blog/post-1 is excluded by built-in patterns, not counted as unmatched
+    expect(result.details?.unmatchedCount).toBe(0);
+  });
+});
+
 describe('configurable thresholds', () => {
   test('uses custom pass threshold', async () => {
     const host = 'cov-custom-pass.local';
diff --git a/test/unit/checks/llms-txt-directive-html.test.ts b/test/unit/checks/llms-txt-directive-html.test.ts
index 871f72b..d9085af 100644
--- a/test/unit/checks/llms-txt-directive-html.test.ts
+++ b/test/unit/checks/llms-txt-directive-html.test.ts
@@ -283,6 +283,79 @@ describe('llms-txt-directive-html', () => {
     expect(result.details?.foundCount).toBe(1);
   });
 
+  it('detects HTML from content body when content-type is not text/html', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(
+            '<html><body><a href="/llms.txt">Index</a><p>Content</p></body></html>',
+            { status: 200, headers: { 'Content-Type': 'text/plain' } },
+          ),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('pass');
+    expect(result.details?.foundCount).toBe(1);
+  });
+
+  it('skips non-HTML content that does not start with <', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse('# Markdown content\n\nSee llms.txt', {
+            status: 200,
+            headers: { 'Content-Type': 'text/plain' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.foundCount).toBe(0);
+  });
+
+  it('reports fetch errors alongside successful tests in the suffix', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(
+            '<html><body><a href="/llms.txt">Index</a><p>Content</p></body></html>',
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
+          ),
+      ),
+      http.get('http://test.local/docs/page2', () => HttpResponse.error()),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1', '/docs/page2')));
+    expect(result.details?.fetchErrors).toBe(1);
+    expect(result.message).toContain('1 failed to fetch');
+  });
+
+  it('passes without "near the top" when directive is mid-page', async () => {
+    // Directive at ~20% of page: past the 10% TOP_THRESHOLD but before the 50% DEEP_THRESHOLD
+    const before = '<p>Some filler content here.</p>'.repeat(10);
+    const after = '<p>More documentation content follows.</p>'.repeat(40);
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(
+            `<html><body>${before}<a href="/llms.txt">Index</a>${after}</body></html>`,
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
+          ),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('pass');
+    expect(result.message).not.toContain('near the top');
+    expect(result.message).not.toContain('buried');
+  });
+
   it('detects text mention of llms.txt in content area (outside nav)', async () => {
     server.use(
       http.get(
diff --git a/test/unit/checks/llms-txt-directive-md.test.ts b/test/unit/checks/llms-txt-directive-md.test.ts
index d225be4..8d975b3 100644
--- a/test/unit/checks/llms-txt-directive-md.test.ts
+++ b/test/unit/checks/llms-txt-directive-md.test.ts
@@ -247,6 +247,85 @@ describe('llms-txt-directive-md', () => {
     expect(result.details?.fetchErrors).toBe(1);
   });
 
+  it('skips content negotiation when content-type is not text/markdown', async () => {
+    server.use(
+      http.get('http://test.local/docs/page1.md', () => new HttpResponse('', { status: 404 })),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse('<html><body>llms.txt mentioned here</body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.fetchErrors).toBe(1);
+  });
+
+  it('skips content negotiation when response body is empty', async () => {
+    server.use(
+      http.get('http://test.local/docs/page1.md', () => new HttpResponse('', { status: 404 })),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse('   ', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.fetchErrors).toBe(1);
+  });
+
+  it('skips content negotiation when response is not ok', async () => {
+    server.use(
+      http.get('http://test.local/docs/page1.md', () => new HttpResponse('', { status: 404 })),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+      http.get('http://test.local/docs/page1', () => new HttpResponse('', { status: 500 })),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.fetchErrors).toBe(1);
+  });
+
+  it('passes without "near the top" when directive is mid-page', async () => {
+    const before = 'Lorem ipsum content paragraph.\n\n'.repeat(15);
+    const after = 'More documentation content here.\n\n'.repeat(80);
+    server.use(
+      http.get(
+        'http://test.local/docs/page1.md',
+        () =>
+          new HttpResponse(`# Docs\n\n${before}See llms.txt for index.\n\n${after}`, {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('pass');
+    expect(result.message).not.toContain('near the top');
+    expect(result.message).not.toContain('buried');
+  });
+
   it('handles curated .md pages', async () => {
     server.use(
       http.get(
diff --git a/test/unit/checks/redirect-behavior.test.ts b/test/unit/checks/redirect-behavior.test.ts
index fb02bc1..a9184b7 100644
--- a/test/unit/checks/redirect-behavior.test.ts
+++ b/test/unit/checks/redirect-behavior.test.ts
@@ -295,4 +295,21 @@ describe('redirect-behavior', () => {
     expect(result.status).toBe('warn');
     expect(result.details?.crossHostCount).toBe(1);
   });
+
+  it('classifies as no-redirect when 3xx lacks Location header', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/no-location',
+        () =>
+          new HttpResponse(null, {
+            status: 301,
+          }),
+        { once: true },
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/no-location')));
+    const pages = result.details?.pageResults as Array<{ classification: string }>;
+    expect(pages[0].classification).toBe('no-redirect');
+  });
 });
diff --git a/test/unit/checks/tabbed-content-serialization.test.ts b/test/unit/checks/tabbed-content-serialization.test.ts
index c34a3c0..d4a9594 100644
--- a/test/unit/checks/tabbed-content-serialization.test.ts
+++ b/test/unit/checks/tabbed-content-serialization.test.ts
@@ -353,6 +353,191 @@ describe('tabbed-content-serialization', () => {
     expect(tabbedPages[0].source).toBe('html');
   });
 
+  it('SPA shell: md fallback returns markdown with no tabs', async () => {
+    const spaHtml =
+      '<html><head><style>' +
+      'x'.repeat(15_000) +
+      '</style></head><body><div id="___gatsby"></div></body></html>';
+
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(spaHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        'http://test.local/docs/page1.md',
+        () =>
+          new HttpResponse('# Just a page\n\nNo tabs here at all.', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://test.local/docs/page1): First\n`;
+    const ctx = makeCtx(content);
+    ctx.previousResults.set('rendering-strategy', {
+      id: 'rendering-strategy',
+      category: 'page-size',
+      status: 'fail',
+      message: 'SPA shell detected',
+      details: {
+        pageResults: [{ url: 'http://test.local/docs/page1', status: 'fail' }],
+      },
+    });
+    const result = await check.run(ctx);
+    expect(result.details?.totalGroupsFound).toBe(0);
+    const tabbedPages = result.details?.tabbedPages as Array<{ source: string }>;
+    expect(tabbedPages[0].source).toBe('html');
+  });
+
+  it('SPA shell: md fallback skips wrong content-type', async () => {
+    const spaHtml =
+      '<html><head><style>' +
+      'x'.repeat(15_000) +
+      '</style></head><body><div id="___gatsby"></div></body></html>';
+
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(spaHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        'http://test.local/docs/page1.md',
+        () =>
+          new HttpResponse('<Tabs><Tab name="A">A</Tab></Tabs>', {
+            status: 200,
+            headers: { 'Content-Type': 'application/octet-stream' },
+          }),
+      ),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://test.local/docs/page1): First\n`;
+    const ctx = makeCtx(content);
+    ctx.previousResults.set('rendering-strategy', {
+      id: 'rendering-strategy',
+      category: 'page-size',
+      status: 'fail',
+      message: 'SPA',
+      details: { pageResults: [{ url: 'http://test.local/docs/page1', status: 'fail' }] },
+    });
+    const result = await check.run(ctx);
+    expect(result.details?.totalGroupsFound).toBe(0);
+  });
+
+  it('SPA shell: md fallback skips HTML body disguised as markdown', async () => {
+    const spaHtml =
+      '<html><head><style>' +
+      'x'.repeat(15_000) +
+      '</style></head><body><div id="___gatsby"></div></body></html>';
+
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(spaHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get(
+        'http://test.local/docs/page1.md',
+        () =>
+          new HttpResponse('<!DOCTYPE html><html><body>not markdown</body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/plain' },
+          }),
+      ),
+      http.get(
+        'http://test.local/docs/page1/index.md',
+        () => new HttpResponse('', { status: 404 }),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://test.local/docs/page1): First\n`;
+    const ctx = makeCtx(content);
+    ctx.previousResults.set('rendering-strategy', {
+      id: 'rendering-strategy',
+      category: 'page-size',
+      status: 'fail',
+      message: 'SPA',
+      details: { pageResults: [{ url: 'http://test.local/docs/page1', status: 'fail' }] },
+    });
+    const result = await check.run(ctx);
+    expect(result.details?.totalGroupsFound).toBe(0);
+  });
+
+  it('SPA shell: md fallback handles fetch error gracefully', async () => {
+    const spaHtml =
+      '<html><head><style>' +
+      'x'.repeat(15_000) +
+      '</style></head><body><div id="___gatsby"></div></body></html>';
+
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(spaHtml, {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          }),
+      ),
+      http.get('http://test.local/docs/page1.md', () => HttpResponse.error()),
+      http.get('http://test.local/docs/page1/index.md', () => HttpResponse.error()),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://test.local/docs/page1): First\n`;
+    const ctx = makeCtx(content);
+    ctx.previousResults.set('rendering-strategy', {
+      id: 'rendering-strategy',
+      category: 'page-size',
+      status: 'fail',
+      message: 'SPA',
+      details: { pageResults: [{ url: 'http://test.local/docs/page1', status: 'fail' }] },
+    });
+    const result = await check.run(ctx);
+    expect(result.details?.totalGroupsFound).toBe(0);
+  });
+
+  it('does not attempt md fallback when rendering-strategy is pass', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(
+            '<html><body><h1>Hello</h1><p>Server-rendered content.</p></body></html>',
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
+          ),
+      ),
+    );
+
+    const content = `# Docs\n> Summary\n## Links\n- [Page 1](http://test.local/docs/page1): First\n`;
+    const ctx = makeCtx(content);
+    ctx.previousResults.set('rendering-strategy', {
+      id: 'rendering-strategy',
+      category: 'page-size',
+      status: 'pass',
+      message: 'All server-rendered',
+      details: { pageResults: [{ url: 'http://test.local/docs/page1', status: 'pass' }] },
+    });
+    const result = await check.run(ctx);
+    expect(result.details?.totalGroupsFound).toBe(0);
+    const tabbedPages = result.details?.tabbedPages as Array<{ source: string }>;
+    expect(tabbedPages[0].source).toBe('html');
+  });
+
   it('does not try .md fallback for non-SPA HTML', async () => {
     // Regular server-rendered HTML with no tabs
     server.use(
diff --git a/test/unit/cli/formatters.test.ts b/test/unit/cli/formatters.test.ts
index 8bd304d..4e97dae 100644
--- a/test/unit/cli/formatters.test.ts
+++ b/test/unit/cli/formatters.test.ts
@@ -830,26 +830,807 @@ describe('formatText', () => {
       expect(output).not.toContain('https://example.com/good');
     });
 
-    it('does not show details without verbose flag', () => {
+    it('shows missing directives for llms-txt-directive-md', () => {
       const report = makeReport({
         results: [
           {
-            id: 'content-start-position',
+            id: 'llms-txt-directive-md',
+            category: 'content-discoverability',
+            status: 'warn',
+            message: 'Directive found on some pages',
+            details: {
+              pageResults: [
+                { url: 'https://example.com/page1', found: false },
+                { url: 'https://example.com/page2', found: true, positionPercent: 3 },
+                { url: 'https://example.com/page3', found: true, positionPercent: 72 },
+                {
+                  url: 'https://example.com/page4',
+                  found: false,
+                  error: 'No markdown version available',
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      // page1: not found, no error
+      expect(output).toContain('https://example.com/page1');
+      expect(output).toContain('no directive found');
+      // page2: found near top (3%), should not appear
+      expect(output).not.toContain('https://example.com/page2');
+      // page3: buried directive
+      expect(output).toContain('https://example.com/page3');
+      expect(output).toContain('directive at 72% of page');
+      // page4: error
+      expect(output).toContain('https://example.com/page4');
+      expect(output).toContain('No markdown version available');
+    });
+
+    it('shows error details in llms-txt-directive-html', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'llms-txt-directive-html',
+            category: 'content-discoverability',
+            status: 'fail',
+            message: 'Could not test',
+            details: {
+              pageResults: [{ url: 'https://example.com/err', found: false, error: 'HTTP 500' }],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('https://example.com/err');
+      expect(output).toContain('HTTP 500');
+    });
+
+    it('shows error details for rendering-strategy pages', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'rendering-strategy',
             category: 'page-size',
+            status: 'fail',
+            message: 'Issues',
+            details: {
+              pageResults: [
+                {
+                  url: 'https://example.com/timeout',
+                  status: 'fail',
+                  error: 'Request timed out',
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('https://example.com/timeout');
+      expect(output).toContain('Request timed out');
+    });
+
+    it('shows error details for redirect-behavior pages', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'redirect-behavior',
+            category: 'url-stability',
+            status: 'fail',
+            message: 'Errors',
+            details: {
+              pageResults: [
+                {
+                  url: 'https://example.com/err',
+                  classification: 'fetch-error',
+                  error: 'ECONNREFUSED',
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('ECONNREFUSED');
+    });
+
+    it('shows error details for auth-gate-detection pages', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'auth-gate-detection',
+            category: 'authentication',
+            status: 'fail',
+            message: 'Issues',
+            details: {
+              pageResults: [
+                {
+                  url: 'https://example.com/err',
+                  classification: 'auth-required',
+                  error: 'Connection reset',
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('Connection reset');
+    });
+
+    it('shows error details for tabbed-content-serialization pages', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'tabbed-content-serialization',
+            category: 'content-structure',
+            status: 'fail',
+            message: 'Issues',
+            details: {
+              tabbedPages: [
+                {
+                  url: 'https://example.com/err',
+                  status: 'fail',
+                  error: 'Parse error',
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('Parse error');
+    });
+
+    it('shows error details for markdown-content-parity pages', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'markdown-content-parity',
+            category: 'observability',
+            status: 'fail',
+            message: 'Issues',
+            details: {
+              pageResults: [
+                {
+                  url: 'https://example.com/err',
+                  status: 'fail',
+                  error: 'Fetch failed',
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('Fetch failed');
+    });
+
+    it('shows parity page with missing missingPercent as "content differs"', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'markdown-content-parity',
+            category: 'observability',
             status: 'warn',
-            message: 'Issues found',
+            message: 'Drift',
             details: {
               pageResults: [
-                { url: 'https://example.com/page1', contentStartPercent: 42, status: 'warn' },
+                {
+                  url: 'https://example.com/drift',
+                  status: 'warn',
+                },
               ],
             },
           },
         ],
         summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
       });
-      const output = formatText(report);
-      expect(output).not.toContain('https://example.com/page1');
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('content differs');
+    });
+
+    it('shows no-store for cache-header-hygiene', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'cache-header-hygiene',
+            category: 'observability',
+            status: 'warn',
+            message: 'Issues',
+            details: {
+              endpointResults: [
+                {
+                  url: 'https://example.com/api',
+                  status: 'warn',
+                  noStore: true,
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('no-store');
+    });
+
+    it('shows error details for cache-header-hygiene', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'cache-header-hygiene',
+            category: 'observability',
+            status: 'fail',
+            message: 'Issues',
+            details: {
+              endpointResults: [
+                {
+                  url: 'https://example.com/err',
+                  status: 'fail',
+                  error: 'DNS resolution failed',
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('DNS resolution failed');
+    });
+
+    it('shows hours for cache-header-hygiene max-age', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'cache-header-hygiene',
+            category: 'observability',
+            status: 'warn',
+            message: 'Issues',
+            details: {
+              endpointResults: [
+                {
+                  url: 'https://example.com/md',
+                  status: 'warn',
+                  effectiveMaxAge: 7200,
+                  noStore: false,
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('max-age 2h');
+    });
+
+    it('shows seconds for short cache-header-hygiene max-age', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'cache-header-hygiene',
+            category: 'observability',
+            status: 'warn',
+            message: 'Issues',
+            details: {
+              endpointResults: [
+                {
+                  url: 'https://example.com/fast',
+                  status: 'warn',
+                  effectiveMaxAge: 300,
+                  noStore: false,
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('max-age 300s');
+    });
+
+    it('shows http-status-codes error details', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'http-status-codes',
+            category: 'url-stability',
+            status: 'fail',
+            message: 'Issues',
+            details: {
+              pageResults: [
+                {
+                  url: 'https://example.com/page',
+                  testUrl: 'https://example.com/page/nonexistent',
+                  classification: 'soft-404',
+                  status: 200,
+                  error: 'Connection timeout',
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('Connection timeout');
     });
+
+    it('shows soft-404 without bodyHint', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'http-status-codes',
+            category: 'url-stability',
+            status: 'fail',
+            message: 'Issues',
+            details: {
+              pageResults: [
+                {
+                  url: 'https://example.com/page',
+                  testUrl: 'https://example.com/page/nonexistent',
+                  classification: 'soft-404',
+                  status: 200,
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('HTTP 200 instead of 404');
+    });
+
+    it('shows section-header without framework info', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'section-header-quality',
+            category: 'content-structure',
+            status: 'warn',
+            message: 'Generic headers',
+            details: {
+              analyses: [
+                {
+                  url: 'https://example.com/tabs',
+                  genericHeaders: 6,
+                  totalHeaders: 8,
+                  hasGenericMajority: true,
+                },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+      });
+      const output = formatText(report, { verbose: true });
+      expect(output).toContain('6/8 generic');
+      expect(output).not.toContain('(undefined)');
+    });
+
+    it('does not show details without verbose flag', () => {
+      const report = makeReport({
+        results: [
+          {
+            id: 'content-start-position',
+            category: 'page-size',
+            status: 'warn',
+            message: 'Issues found',
+            details: {
+              pageResults: [
+                { url: 'https://example.com/page1', contentStartPercent: 42, status: 'warn' },
+              ],
+            },
+          },
+        ],
+        summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+      });
+      const output = formatText(report);
+      expect(output).not.toContain('https://example.com/page1');
+    });
+  });
+});
+
+describe('formatText verbose null-guard branches', () => {
+  it('returns empty when cache-header-hygiene has no endpointResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'cache-header-hygiene',
+          category: 'observability',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when section-header-quality has no analyses', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'section-header-quality',
+          category: 'content-structure',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('uses p.url when testUrl is absent for http-status-codes', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'http-status-codes',
+          category: 'url-stability',
+          status: 'fail',
+          message: 'Soft 404s',
+          details: {
+            pageResults: [
+              {
+                url: 'https://example.com/page',
+                classification: 'soft-404',
+                status: 200,
+              },
+            ],
+          },
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('https://example.com/page');
+    expect(output).toContain('HTTP 200 instead of 404');
+  });
+
+  it('returns empty when page-size-html has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'page-size-html',
+          category: 'page-size',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when page-size-markdown has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'page-size-markdown',
+          category: 'page-size',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when content-start-position has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'content-start-position',
+          category: 'page-size',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when markdown-url-support has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'markdown-url-support',
+          category: 'markdown-availability',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when content-negotiation has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'content-negotiation',
+          category: 'markdown-availability',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when markdown-code-fence-validity has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'markdown-code-fence-validity',
+          category: 'content-structure',
+          status: 'fail',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when llms-txt-links-resolve has no broken array', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'llms-txt-links-resolve',
+          category: 'content-discoverability',
+          status: 'fail',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when rendering-strategy has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'rendering-strategy',
+          category: 'page-size',
+          status: 'fail',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when redirect-behavior has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'redirect-behavior',
+          category: 'url-stability',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when auth-gate-detection has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'auth-gate-detection',
+          category: 'authentication',
+          status: 'fail',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when llms-txt-directive-html has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'llms-txt-directive-html',
+          category: 'content-discoverability',
+          status: 'fail',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when llms-txt-directive-md has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'llms-txt-directive-md',
+          category: 'content-discoverability',
+          status: 'fail',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when tabbed-content-serialization has no tabbedPages', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'tabbed-content-serialization',
+          category: 'content-structure',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when markdown-content-parity has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'markdown-content-parity',
+          category: 'observability',
+          status: 'warn',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+
+  it('returns empty when http-status-codes has no pageResults', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'http-status-codes',
+          category: 'url-stability',
+          status: 'fail',
+          message: 'Issues',
+          details: {},
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Issues');
+  });
+});
+
+describe('formatText edge cases', () => {
+  it('handles invalid timestamp gracefully', () => {
+    const report = makeReport({ timestamp: 'not-a-real-date' });
+    const output = formatText(report);
+    expect(output).toContain('not-a-real-date');
+  });
+
+  it('renders unknown status with fallback icon', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'some-check',
+          category: 'test-cat',
+          status: 'something-unknown' as 'pass',
+          message: 'Weird result',
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report);
+    expect(output).toContain('Weird result');
+  });
+
+  it('shows spec link for error status', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'page-size-html',
+          category: 'page-size',
+          status: 'error',
+          message: 'Timeout',
+        },
+      ],
+      summary: { total: 1, pass: 0, warn: 0, fail: 0, skip: 0, error: 1 },
+    });
+    const output = formatText(report);
+    expect(output).toContain('Learn more:');
+    expect(output).toContain('#page-size-html');
+  });
+
+  it('does not show spec link for pass status', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'llms-txt-exists',
+          category: 'content-discoverability',
+          status: 'pass',
+          message: 'Found',
+        },
+      ],
+      summary: { total: 1, pass: 1, warn: 0, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report);
+    expect(output).not.toContain('Learn more:');
+  });
+
+  it('handles verbose with check having no detail formatter', () => {
+    const report = makeReport({
+      results: [
+        {
+          id: 'llms-txt-exists',
+          category: 'content-discoverability',
+          status: 'pass',
+          message: 'Found',
+          details: { someField: 'value' },
+        },
+      ],
+      summary: { total: 1, pass: 1, warn: 0, fail: 0, skip: 0, error: 0 },
+    });
+    const output = formatText(report, { verbose: true });
+    expect(output).toContain('Found');
   });
 });
 
diff --git a/test/unit/cli/scorecard-formatter.test.ts b/test/unit/cli/scorecard-formatter.test.ts
index e4b6235..daa1122 100644
--- a/test/unit/cli/scorecard-formatter.test.ts
+++ b/test/unit/cli/scorecard-formatter.test.ts
@@ -433,6 +433,97 @@ describe('formatScorecard', () => {
     expect(output).not.toContain('null / 100');
   });
 
+  it('shows tag scores with singular page count', () => {
+    const score = makeScoreResult({
+      tagScores: {
+        quickstart: {
+          score: 95,
+          grade: 'A',
+          pageCount: 1,
+          checks: [],
+        },
+      },
+    });
+    const output = formatScorecard(makeReport(), score);
+    expect(output).toContain('1 page');
+    expect(output).not.toContain('1 pages');
+  });
+
+  it('shows tag score check breakdown with only warns (no fails)', () => {
+    const score = makeScoreResult({
+      tagScores: {
+        tutorials: {
+          score: 80,
+          grade: 'B',
+          pageCount: 3,
+          checks: [
+            {
+              checkId: 'page-size-html',
+              category: 'page-size',
+              weight: 7,
+              proportion: 0.8,
+              pages: [
+                { url: 'https://example.com/a', status: 'pass' },
+                { url: 'https://example.com/b', status: 'warn' },
+                { url: 'https://example.com/c', status: 'warn' },
+              ],
+            },
+          ],
+        },
+      },
+    });
+    const output = formatScorecard(makeReport(), score);
+    expect(output).toContain('2 warn');
+    expect(output).toContain('1 pass');
+    expect(output).not.toContain('fail');
+    expect(output).toContain('WARN');
+  });
+
+  it('handles tag score page with skip status (not counted in pass/warn/fail)', () => {
+    const score = makeScoreResult({
+      tagScores: {
+        api: {
+          score: 50,
+          grade: 'D',
+          pageCount: 2,
+          checks: [
+            {
+              checkId: 'content-start-position',
+              category: 'page-size',
+              weight: 4,
+              proportion: 0.5,
+              pages: [
+                { url: 'https://example.com/a', status: 'fail' },
+                { url: 'https://example.com/b', status: 'skip' },
+              ],
+            },
+          ],
+        },
+      },
+    });
+    const output = formatScorecard(makeReport(), score);
+    expect(output).toContain('1 fail');
+    expect(output).not.toContain('skip');
+  });
+
+  it('shows A+ grade with correct color', () => {
+    const score = makeScoreResult({
+      overall: 100,
+      grade: 'A+',
+      categoryScores: {
+        'content-discoverability': { score: 100, grade: 'A+' },
+      },
+    });
+    const output = formatScorecard(makeReport(), score);
+    expect(output).toContain('(A+)');
+  });
+
+  it('handles invalid timestamp gracefully', () => {
+    const report = makeReport({ timestamp: 'not-a-date' });
+    const output = formatScorecard(report, makeScoreResult());
+    expect(output).toContain('not-a-date');
+  });
+
   it('renders N/A scorecard end-to-end from single-page discovery report', () => {
     const report = makeReport({
       results: [
diff --git a/test/unit/helpers/detect-tabs.test.ts b/test/unit/helpers/detect-tabs.test.ts
index cedef37..d237e54 100644
--- a/test/unit/helpers/detect-tabs.test.ts
+++ b/test/unit/helpers/detect-tabs.test.ts
@@ -624,6 +624,104 @@ Just some text, no Tab elements here.
     expect(groups).toHaveLength(0);
   });
 
+  it('MkDocs skips container with no labels and no panels', () => {
+    const html = `
+      <div class="tabbed-set">
+        <div class="tabbed-labels"></div>
+        <div class="tabbed-content"></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(0);
+  });
+
+  it('Sphinx skips container with no tabs and no panels', () => {
+    const html = `
+      <div class="sphinx-tabs"></div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(0);
+  });
+
+  it('MkDocs handles more labels than panels (empty html fallback)', () => {
+    const html = `
+      <div class="tabbed-set">
+        <div class="tabbed-labels">
+          <label>Tab A</label>
+          <label>Tab B</label>
+          <label>Tab C</label>
+        </div>
+        <div class="tabbed-content">
+          <div class="tabbed-block"><pre>only panel</pre></div>
+        </div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('mkdocs');
+    expect(groups[0].tabCount).toBe(3);
+    expect(groups[0].panels[0].html).toContain('only panel');
+    expect(groups[0].panels[1].html).toBe('');
+    expect(groups[0].panels[2].html).toBe('');
+  });
+
+  it('MS Learn skips container already claimed by Docusaurus', () => {
+    // A .tabGroup inside a Docusaurus tab — Docusaurus runs first and claims
+    // the outer container. MS Learn should skip the inner .tabGroup.
+    const html = `
+      <div>
+        <ul role="tablist">
+          <li class="tabs__item" role="tab">Outer</li>
+        </ul>
+        <div role="tabpanel">
+          <div class="tabGroup">
+            <a role="tab" data-tab="inner">Inner</a>
+            <section role="tabpanel"><pre>inner content</pre></section>
+          </div>
+        </div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('docusaurus');
+  });
+
+  it('MS Learn handles more panels than tabs (null label fallback)', () => {
+    const html = `
+      <div class="tabGroup">
+        <a role="tab" data-tab="only">Only Tab</a>
+        <section role="tabpanel"><pre>panel 1</pre></section>
+        <section role="tabpanel"><pre>panel 2</pre></section>
+        <section role="tabpanel"><pre>panel 3</pre></section>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('microsoft-learn');
+    expect(groups[0].tabCount).toBe(3);
+    expect(groups[0].panels[0].label).toBe('Only Tab');
+    expect(groups[0].panels[1].label).toBeNull();
+    expect(groups[0].panels[2].label).toBeNull();
+  });
+
+  it('generic ARIA with panels but no tab elements uses panel count', () => {
+    // Some implementations have tabpanels without explicit role="tab" buttons.
+    // The detector should use panels.length as the count and set null labels.
+    const html = `
+      <div>
+        <div role="tablist"></div>
+        <div role="tabpanel"><p>Content A</p></div>
+        <div role="tabpanel"><p>Content B</p></div>
+      </div>
+    `;
+    const groups = detectTabGroups(html);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].framework).toBe('generic-aria');
+    expect(groups[0].tabCount).toBe(2);
+    expect(groups[0].panels[0].label).toBeNull();
+    expect(groups[0].panels[1].label).toBeNull();
+  });
+
   it('Docusaurus detector uses ancestor walking when panels are not siblings', () => {
     // Docusaurus with a wrapper structure where tablist and panels
     // share a grandparent rather than a direct parent.
diff --git a/test/unit/helpers/get-markdown-content.test.ts b/test/unit/helpers/get-markdown-content.test.ts
new file mode 100644
index 0000000..924637d
--- /dev/null
+++ b/test/unit/helpers/get-markdown-content.test.ts
@@ -0,0 +1,379 @@
+import { describe, it, expect, beforeAll, afterEach } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+import { createContext } from '../../../src/runner.js';
+import { getMarkdownContent } from '../../../src/helpers/get-markdown-content.js';
+import type { DiscoveredFile } from '../../../src/types.js';
+import { mockSitemapNotFound } from '../../helpers/mock-sitemap-not-found.js';
+
+const server = setupServer();
+
+beforeAll(() => {
+  server.listen({ onUnhandledRequest: 'bypass' });
+  return () => server.close();
+});
+
+afterEach(() => server.resetHandlers());
+
+describe('getMarkdownContent', () => {
+  describe('cached mode (dependency already ran)', () => {
+    it('returns cached pages when markdown-url-support passed', async () => {
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      ctx.previousResults.set('markdown-url-support', {
+        id: 'markdown-url-support',
+        category: 'markdown-availability',
+        status: 'pass',
+        message: 'Markdown supported',
+      });
+      ctx.pageCache.set('http://test.local/docs/page1', {
+        url: 'http://test.local/docs/page1',
+        markdown: { content: '# Page 1\n\nContent.', source: 'md-url' },
+      });
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('cached');
+      if (result.mode === 'cached') {
+        expect(result.depPassed).toBe(true);
+        expect(result.pages).toHaveLength(1);
+        expect(result.pages[0].content).toBe('# Page 1\n\nContent.');
+        expect(result.pages[0].source).toBe('md-url');
+      }
+    });
+
+    it('returns cached pages when content-negotiation passed', async () => {
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      ctx.previousResults.set('content-negotiation', {
+        id: 'content-negotiation',
+        category: 'markdown-availability',
+        status: 'pass',
+        message: 'Content negotiation supported',
+      });
+      ctx.pageCache.set('http://test.local/docs/page1', {
+        url: 'http://test.local/docs/page1',
+        markdown: { content: '# Page 1', source: 'content-negotiation' },
+      });
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('cached');
+      if (result.mode === 'cached') {
+        expect(result.depPassed).toBe(true);
+      }
+    });
+
+    it('sets depPassed false when dependency ran but failed', async () => {
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      ctx.previousResults.set('markdown-url-support', {
+        id: 'markdown-url-support',
+        category: 'markdown-availability',
+        status: 'fail',
+        message: 'Not supported',
+      });
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('cached');
+      if (result.mode === 'cached') {
+        expect(result.depPassed).toBe(false);
+        expect(result.pages).toHaveLength(0);
+      }
+    });
+
+    it('sets depPassed true when dependency warned', async () => {
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      ctx.previousResults.set('content-negotiation', {
+        id: 'content-negotiation',
+        category: 'markdown-availability',
+        status: 'warn',
+        message: 'Partially supported',
+      });
+      ctx.pageCache.set('http://test.local/docs/page1', {
+        url: 'http://test.local/docs/page1',
+        markdown: { content: '# Page 1', source: 'content-negotiation' },
+      });
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('cached');
+      if (result.mode === 'cached') {
+        expect(result.depPassed).toBe(true);
+      }
+    });
+
+    it('skips cache entries without markdown content', async () => {
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      ctx.previousResults.set('markdown-url-support', {
+        id: 'markdown-url-support',
+        category: 'markdown-availability',
+        status: 'pass',
+        message: 'OK',
+      });
+      ctx.pageCache.set('http://test.local/docs/page1', {
+        url: 'http://test.local/docs/page1',
+        markdown: { content: '# Has content', source: 'md-url' },
+      });
+      ctx.pageCache.set('http://test.local/docs/page2', {
+        url: 'http://test.local/docs/page2',
+        // No markdown field
+      });
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('cached');
+      expect(result.pages.filter((p) => p.source !== 'llms-txt')).toHaveLength(1);
+    });
+  });
+
+  describe('llms.txt content collection', () => {
+    it('includes llms.txt content from llms-txt-exists result', async () => {
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      const discovered: DiscoveredFile[] = [
+        {
+          url: 'http://test.local/llms.txt',
+          content: '# Docs\n\n- [Guide](/guide): A guide',
+          status: 200,
+          redirected: false,
+        },
+      ];
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'content-discoverability',
+        status: 'pass',
+        message: 'Found',
+        details: { discoveredFiles: discovered },
+      });
+      ctx.previousResults.set('markdown-url-support', {
+        id: 'markdown-url-support',
+        category: 'markdown-availability',
+        status: 'pass',
+        message: 'OK',
+      });
+
+      const result = await getMarkdownContent(ctx);
+      const llmsPages = result.pages.filter((p) => p.source === 'llms-txt');
+      expect(llmsPages).toHaveLength(1);
+      expect(llmsPages[0].url).toBe('http://test.local/llms.txt');
+      expect(llmsPages[0].content).toContain('# Docs');
+    });
+
+    it('skips discovered files without content', async () => {
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      const discovered: DiscoveredFile[] = [
+        { url: 'http://test.local/llms.txt', status: 200, redirected: false },
+      ];
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'content-discoverability',
+        status: 'pass',
+        message: 'Found',
+        details: { discoveredFiles: discovered },
+      });
+      ctx.previousResults.set('markdown-url-support', {
+        id: 'markdown-url-support',
+        category: 'markdown-availability',
+        status: 'pass',
+        message: 'OK',
+      });
+
+      const result = await getMarkdownContent(ctx);
+      const llmsPages = result.pages.filter((p) => p.source === 'llms-txt');
+      expect(llmsPages).toHaveLength(0);
+    });
+
+    it('handles missing llms-txt-exists result', async () => {
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      ctx.previousResults.set('markdown-url-support', {
+        id: 'markdown-url-support',
+        category: 'markdown-availability',
+        status: 'pass',
+        message: 'OK',
+      });
+
+      const result = await getMarkdownContent(ctx);
+      const llmsPages = result.pages.filter((p) => p.source === 'llms-txt');
+      expect(llmsPages).toHaveLength(0);
+    });
+  });
+
+  describe('standalone mode (no dependency ran)', () => {
+    it('fetches markdown via .md URL candidates', async () => {
+      const llmsTxt = '# Docs\n\n- [Page 1](http://test.local/docs/page1): Page';
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      const discovered: DiscoveredFile[] = [
+        { url: 'http://test.local/llms.txt', content: llmsTxt, status: 200, redirected: false },
+      ];
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'content-discoverability',
+        status: 'pass',
+        message: 'Found',
+        details: { discoveredFiles: discovered },
+      });
+      mockSitemapNotFound(server, 'http://test.local');
+
+      server.use(
+        http.get(
+          'http://test.local/docs/page1.md',
+          () =>
+            new HttpResponse('# Page 1\n\nMarkdown content here.', {
+              status: 200,
+              headers: { 'Content-Type': 'text/markdown' },
+            }),
+        ),
+      );
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('standalone');
+      const fetched = result.pages.filter((p) => p.source === 'standalone-md-url');
+      expect(fetched).toHaveLength(1);
+      expect(fetched[0].content).toContain('# Page 1');
+    });
+
+    it('falls back to content negotiation when .md URLs fail', async () => {
+      const llmsTxt = '# Docs\n\n- [Page 1](http://test.local/docs/page1): Page';
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      const discovered: DiscoveredFile[] = [
+        { url: 'http://test.local/llms.txt', content: llmsTxt, status: 200, redirected: false },
+      ];
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'content-discoverability',
+        status: 'pass',
+        message: 'Found',
+        details: { discoveredFiles: discovered },
+      });
+      mockSitemapNotFound(server, 'http://test.local');
+
+      server.use(
+        http.get('http://test.local/docs/page1.md', () => new HttpResponse('', { status: 404 })),
+        http.get(
+          'http://test.local/docs/page1/index.md',
+          () => new HttpResponse('', { status: 404 }),
+        ),
+        http.get('http://test.local/docs/page1', ({ request }) => {
+          const accept = request.headers.get('accept') ?? '';
+          if (accept.includes('text/markdown')) {
+            return new HttpResponse('# Page 1 via CN\n\nContent.', {
+              status: 200,
+              headers: { 'Content-Type': 'text/markdown' },
+            });
+          }
+          return new HttpResponse('<html><body>Page 1</body></html>', {
+            status: 200,
+            headers: { 'Content-Type': 'text/html' },
+          });
+        }),
+      );
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('standalone');
+      const fetched = result.pages.filter((p) => p.source === 'standalone-content-negotiation');
+      expect(fetched).toHaveLength(1);
+      expect(fetched[0].content).toContain('# Page 1 via CN');
+    });
+
+    it('returns empty when no markdown is available', async () => {
+      const llmsTxt = '# Docs\n\n- [Page 1](http://test.local/docs/page1): Page';
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      const discovered: DiscoveredFile[] = [
+        { url: 'http://test.local/llms.txt', content: llmsTxt, status: 200, redirected: false },
+      ];
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'content-discoverability',
+        status: 'pass',
+        message: 'Found',
+        details: { discoveredFiles: discovered },
+      });
+      mockSitemapNotFound(server, 'http://test.local');
+
+      server.use(
+        http.get('http://test.local/docs/page1.md', () => new HttpResponse('', { status: 404 })),
+        http.get(
+          'http://test.local/docs/page1/index.md',
+          () => new HttpResponse('', { status: 404 }),
+        ),
+        http.get(
+          'http://test.local/docs/page1',
+          () =>
+            new HttpResponse('<html><body>HTML only</body></html>', {
+              status: 200,
+              headers: { 'Content-Type': 'text/html' },
+            }),
+        ),
+      );
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('standalone');
+      const fetched = result.pages.filter((p) => p.source !== 'llms-txt');
+      expect(fetched).toHaveLength(0);
+    });
+
+    it('skips .md URLs that return non-markdown content', async () => {
+      const llmsTxt = '# Docs\n\n- [Page 1](http://test.local/docs/page1): Page';
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      const discovered: DiscoveredFile[] = [
+        { url: 'http://test.local/llms.txt', content: llmsTxt, status: 200, redirected: false },
+      ];
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'content-discoverability',
+        status: 'pass',
+        message: 'Found',
+        details: { discoveredFiles: discovered },
+      });
+      mockSitemapNotFound(server, 'http://test.local');
+
+      server.use(
+        http.get(
+          'http://test.local/docs/page1.md',
+          () =>
+            new HttpResponse('<html><body>Not markdown</body></html>', {
+              status: 200,
+              headers: { 'Content-Type': 'text/html' },
+            }),
+        ),
+        http.get(
+          'http://test.local/docs/page1/index.md',
+          () => new HttpResponse('', { status: 404 }),
+        ),
+        http.get(
+          'http://test.local/docs/page1',
+          () =>
+            new HttpResponse('<html><body>HTML</body></html>', {
+              status: 200,
+              headers: { 'Content-Type': 'text/html' },
+            }),
+        ),
+      );
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('standalone');
+      const fetched = result.pages.filter((p) => p.source !== 'llms-txt');
+      expect(fetched).toHaveLength(0);
+    });
+
+    it('handles fetch errors gracefully', async () => {
+      const llmsTxt = '# Docs\n\n- [Page 1](http://test.local/docs/page1): Page';
+      const ctx = createContext('http://test.local', { requestDelay: 0 });
+      const discovered: DiscoveredFile[] = [
+        { url: 'http://test.local/llms.txt', content: llmsTxt, status: 200, redirected: false },
+      ];
+      ctx.previousResults.set('llms-txt-exists', {
+        id: 'llms-txt-exists',
+        category: 'content-discoverability',
+        status: 'pass',
+        message: 'Found',
+        details: { discoveredFiles: discovered },
+      });
+      mockSitemapNotFound(server, 'http://test.local');
+
+      server.use(
+        http.get('http://test.local/docs/page1.md', () => HttpResponse.error()),
+        http.get('http://test.local/docs/page1/index.md', () => HttpResponse.error()),
+        http.get('http://test.local/docs/page1', () => HttpResponse.error()),
+      );
+
+      const result = await getMarkdownContent(ctx);
+      expect(result.mode).toBe('standalone');
+      const fetched = result.pages.filter((p) => p.source !== 'llms-txt');
+      expect(fetched).toHaveLength(0);
+    });
+  });
+});
diff --git a/test/unit/helpers/http.test.ts b/test/unit/helpers/http.test.ts
index 3056564..bdd2192 100644
--- a/test/unit/helpers/http.test.ts
+++ b/test/unit/helpers/http.test.ts
@@ -229,4 +229,82 @@ describe('createHttpClient', () => {
       expect(text).toBe(original);
     });
   });
+
+  describe('concurrency and rate limiting', () => {
+    it('enforces requestDelay between requests', async () => {
+      const timestamps: number[] = [];
+      globalThis.fetch = vi.fn(async () => {
+        timestamps.push(Date.now());
+        return makeResponse(200);
+      });
+
+      const client = createHttpClient({
+        requestDelay: 200,
+        requestTimeout: 5000,
+        maxConcurrency: 10,
+      });
+
+      const p1 = client.fetch('http://example.com/a');
+      await vi.advanceTimersByTimeAsync(0);
+      await p1;
+
+      const p2 = client.fetch('http://example.com/b');
+      await vi.advanceTimersByTimeAsync(250);
+      await p2;
+
+      expect(timestamps).toHaveLength(2);
+      expect(timestamps[1] - timestamps[0]).toBeGreaterThanOrEqual(200);
+    });
+
+    it('waits for a concurrency slot when at max', async () => {
+      let resolveFetch: (() => void) | null = null;
+      let callCount = 0;
+
+      globalThis.fetch = vi.fn(
+        () =>
+          new Promise<Response>((resolve) => {
+            callCount++;
+            if (callCount === 1) {
+              resolveFetch = () =>
+                resolve({
+                  ok: true,
+                  status: 200,
+                  headers: new Headers(),
+                  text: async () => '',
+                } as unknown as Response);
+            } else {
+              resolve({
+                ok: true,
+                status: 200,
+                headers: new Headers(),
+                text: async () => '',
+              } as unknown as Response);
+            }
+          }),
+      );
+
+      const client = createHttpClient({
+        requestDelay: 0,
+        requestTimeout: 5000,
+        maxConcurrency: 1,
+      });
+
+      const p1 = client.fetch('http://example.com/a');
+      await vi.advanceTimersByTimeAsync(0);
+
+      const p2 = client.fetch('http://example.com/b');
+      // Second request should be waiting for the slot
+      await vi.advanceTimersByTimeAsync(50);
+      expect(callCount).toBe(1);
+
+      // Release the first request
+      resolveFetch!();
+      await p1;
+
+      // Now the second request can proceed
+      await vi.advanceTimersByTimeAsync(100);
+      await p2;
+      expect(callCount).toBe(2);
+    });
+  });
 });
diff --git a/test/unit/scoring/coefficients.test.ts b/test/unit/scoring/coefficients.test.ts
index 61dd40b..e890f52 100644
--- a/test/unit/scoring/coefficients.test.ts
+++ b/test/unit/scoring/coefficients.test.ts
@@ -188,6 +188,60 @@ describe('coefficients', () => {
     });
   });
 
+  describe('HTML path coefficient edge cases', () => {
+    it('returns 1.0 when rendering-strategy is skip', () => {
+      const results = resultsMap(r('rendering-strategy', 'skip'));
+      expect(getCoefficient('page-size-html', results)).toBe(1.0);
+    });
+
+    it('returns 1.0 when rendering-strategy is error', () => {
+      const results = resultsMap(r('rendering-strategy', 'error'));
+      expect(getCoefficient('page-size-html', results)).toBe(1.0);
+    });
+
+    it('returns 1.0 when rendering-strategy has no details', () => {
+      const results = resultsMap(r('rendering-strategy', 'warn'));
+      expect(getCoefficient('page-size-html', results)).toBe(1.0);
+    });
+
+    it('returns 1.0 when rendering-strategy total is zero', () => {
+      const results = resultsMap(
+        r('rendering-strategy', 'warn', {
+          serverRendered: 0,
+          sparseContent: 0,
+          spaShells: 0,
+        }),
+      );
+      expect(getCoefficient('page-size-html', results)).toBe(1.0);
+    });
+  });
+
+  describe('index truncation coefficient edge cases', () => {
+    it('falls back to 0.5 when sizes have zero characters', () => {
+      const results = resultsMap(
+        r('llms-txt-size', 'fail', {
+          sizes: [{ characters: 0 }],
+        }),
+      );
+      expect(getCoefficient('llms-txt-valid', results)).toBe(0.5);
+    });
+
+    it('falls back to 0.5 when fail has no details at all', () => {
+      const results = resultsMap(r('llms-txt-size', 'fail'));
+      expect(getCoefficient('llms-txt-valid', results)).toBe(0.5);
+    });
+
+    it('returns 1.0 for non-standard status (error)', () => {
+      const results = resultsMap(r('llms-txt-size', 'error'));
+      expect(getCoefficient('llms-txt-valid', results)).toBe(1.0);
+    });
+
+    it('returns 1.0 for skip status', () => {
+      const results = resultsMap(r('llms-txt-size', 'skip'));
+      expect(getCoefficient('llms-txt-valid', results)).toBe(1.0);
+    });
+  });
+
   describe('non-coefficient checks', () => {
     it('returns 1.0 for checks without coefficients', () => {
       const results = resultsMap();
diff --git a/test/unit/scoring/diagnostics.test.ts b/test/unit/scoring/diagnostics.test.ts
index 4a6a7d7..0152207 100644
--- a/test/unit/scoring/diagnostics.test.ts
+++ b/test/unit/scoring/diagnostics.test.ts
@@ -503,5 +503,123 @@ describe('diagnostics', () => {
       const diags = evaluateDiagnostics(results, defaultReport());
       expect(diags.find((d) => d.id === 'rate-limiting-severe')).toBeUndefined();
     });
+
+    it('uses pageResults length when testedLinks is absent', () => {
+      const results = resultsMap(
+        r('markdown-url-support', 'warn', {
+          pageResults: Array.from({ length: 10 }, () => ({})),
+          rateLimited: 5,
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'rate-limiting-severe');
+      expect(diag).toBeDefined();
+      expect(diag!.message).toContain('50%');
+    });
+  });
+
+  describe('gzipped-sitemap-skipped message', () => {
+    it('includes URL from warning when regex matches', () => {
+      const results = resultsMap(
+        r('page-size-html', 'pass', {
+          discoveryWarnings: [
+            'Skipped gzipped sitemap (not supported): https://example.com/sitemap.xml.gz',
+          ],
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'gzipped-sitemap-skipped');
+      expect(diag!.message).toContain('https://example.com/sitemap.xml.gz');
+    });
+
+    it('handles warning without URL pattern', () => {
+      const results = resultsMap(
+        r('page-size-html', 'pass', {
+          discoveryWarnings: ['Skipped gzipped sitemap (not supported)'],
+        }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'gzipped-sitemap-skipped');
+      expect(diag).toBeDefined();
+      expect(diag!.message).toContain('gzipped sitemap was skipped');
+    });
+  });
+
+  describe('spa-shell-html-invalid message', () => {
+    it('notes no alternative path when markdown-url-support fails', () => {
+      const results = resultsMap(
+        r('rendering-strategy', 'fail', {
+          serverRendered: 3,
+          sparseContent: 5,
+          spaShells: 5,
+        }),
+        r('markdown-url-support', 'fail'),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'spa-shell-html-invalid');
+      expect(diag).toBeDefined();
+      expect(diag!.message).toContain('no alternative path');
+    });
+  });
+
+  describe('no-viable-path message', () => {
+    it('describes missing llms.txt when status is fail', () => {
+      const results = resultsMap(
+        r('llms-txt-exists', 'fail'),
+        r('rendering-strategy', 'fail'),
+        r('markdown-url-support', 'fail'),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'no-viable-path');
+      expect(diag!.message).toContain('no llms.txt');
+    });
+
+    it('describes broken llms.txt links when resolve rate is low', () => {
+      const results = resultsMap(
+        r('llms-txt-exists', 'pass'),
+        r('llms-txt-links-resolve', 'fail', { resolveRate: 5 }),
+        r('rendering-strategy', 'fail'),
+        r('markdown-url-support', 'fail'),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'no-viable-path');
+      expect(diag!.message).toContain('5% of links resolve');
+    });
+  });
+
+  describe('truncated-index message', () => {
+    it('computes visible percentage from file size', () => {
+      const results = resultsMap(
+        r('llms-txt-exists', 'pass'),
+        r('llms-txt-size', 'fail', { sizes: [{ characters: 500_000 }] }),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'truncated-index');
+      expect(diag!.message).toContain('500,000 characters');
+      expect(diag!.message).toContain('20%');
+    });
+  });
+
+  describe('page-size-no-markdown-escape message', () => {
+    it('includes fail bucket count in message', () => {
+      const results = resultsMap(
+        r('page-size-html', 'fail', { failBucket: 7 }),
+        r('markdown-url-support', 'fail'),
+      );
+      const diags = evaluateDiagnostics(results, defaultReport());
+      const diag = diags.find((d) => d.id === 'page-size-no-markdown-escape');
+      expect(diag!.message).toContain('7 pages');
+    });
+  });
+
+  describe('single-page-sample message', () => {
+    it('uses plural form for multiple pages', () => {
+      const report = defaultReport();
+      report.testedPages = 3;
+      const results = resultsMap();
+      const diags = evaluateDiagnostics(results, report);
+      const diag = diags.find((d) => d.id === 'single-page-sample');
+      expect(diag!.message).toContain('3 pages were');
+    });
   });
 });
diff --git a/test/unit/scoring/proportions.test.ts b/test/unit/scoring/proportions.test.ts
index 4713447..1a56c1e 100644
--- a/test/unit/scoring/proportions.test.ts
+++ b/test/unit/scoring/proportions.test.ts
@@ -364,6 +364,187 @@ describe('proportions', () => {
     });
   });
 
+  describe('edge cases with empty or malformed details', () => {
+    it('bucket check: returns undefined when all buckets are zero', () => {
+      const result = getCheckProportion(
+        makeResult('page-size-html', 'warn', {
+          passBucket: 0,
+          warnBucket: 0,
+          failBucket: 0,
+        }),
+        makeWeight(7, 0.5),
+      );
+      // Falls back to status-based proportion
+      expect(result!.proportion).toBe(0.5);
+    });
+
+    it('pageResults: returns undefined when array is empty', () => {
+      const result = getCheckProportion(
+        makeResult('markdown-code-fence-validity', 'pass', {
+          pageResults: [],
+        }),
+        makeWeight(4),
+      );
+      // Falls back to status-based
+      expect(result!.proportion).toBe(1.0);
+    });
+
+    it('rendering-strategy: returns undefined when all counts are zero', () => {
+      const result = getCheckProportion(
+        makeResult('rendering-strategy', 'warn', {
+          serverRendered: 0,
+          sparseContent: 0,
+          spaShells: 0,
+        }),
+        makeWeight(10, 0.5),
+      );
+      // Falls back to status-based
+      expect(result!.proportion).toBe(0.5);
+    });
+
+    it('tabbed-content: returns undefined when tabbedPages is empty', () => {
+      const result = getCheckProportion(
+        makeResult('tabbed-content-serialization', 'warn', {
+          tabbedPages: [],
+        }),
+        makeWeight(4, 0.5),
+      );
+      // Falls back to status-based
+      expect(result!.proportion).toBe(0.5);
+    });
+
+    it('section-header: returns undefined when analyses is empty', () => {
+      const result = getCheckProportion(
+        makeResult('section-header-quality', 'warn', {
+          analyses: [],
+        }),
+        makeWeight(2, 0.5),
+      );
+      // Falls back to status-based
+      expect(result!.proportion).toBe(0.5);
+    });
+
+    it('http-status-codes: returns undefined when pageResults is empty', () => {
+      const result = getCheckProportion(
+        makeResult('http-status-codes', 'pass', {
+          pageResults: [],
+        }),
+        makeWeight(7),
+      );
+      // Falls back to status-based
+      expect(result!.proportion).toBe(1.0);
+    });
+
+    it('content-negotiation: excludes skipped and handles unknown classification', () => {
+      const result = getCheckProportion(
+        makeResult('content-negotiation', 'warn', {
+          pageResults: [
+            { url: '/a', classification: 'unknown-type', status: 200 },
+            { url: '/b', classification: 'markdown-with-correct-type', status: 200 },
+            { url: '/c', skipped: true, status: 0 },
+          ],
+        }),
+        makeWeight(4, 0.75),
+      );
+      // unknown → skip (excluded), markdown-with-correct-type → pass
+      // Only 1 item counted (pass), skip items excluded from proportion
+      expect(result!.proportion).toBe(1.0);
+      expect(result!.tested).toBe(1);
+    });
+
+    it('redirect-behavior: handles unknown classification as skip', () => {
+      const result = getCheckProportion(
+        makeResult('redirect-behavior', 'pass', {
+          pageResults: [
+            { url: '/a', classification: 'no-redirect' },
+            { url: '/b', classification: 'unknown-type' },
+          ],
+        }),
+        makeWeight(4, 0.6),
+      );
+      // no-redirect → pass, unknown → skip
+      expect(result!.proportion).toBe(1.0);
+      expect(result!.tested).toBe(1);
+    });
+
+    it('auth-gate: handles auth-redirect classification', () => {
+      const result = getCheckProportion(
+        makeResult('auth-gate-detection', 'fail', {
+          pageResults: [{ url: '/a', classification: 'auth-redirect' }],
+        }),
+        makeWeight(10, 0.5),
+      );
+      expect(result!.proportion).toBe(0.0);
+    });
+
+    it('llms-txt-size: uses default thresholds when not provided', () => {
+      const result = getCheckProportion(
+        makeResult('llms-txt-size', 'warn', {
+          sizes: [{ characters: 30_000 }, { characters: 70_000 }],
+        }),
+        makeWeight(7, 0.5),
+      );
+      // Default thresholds: pass=50K, fail=100K
+      // 30K → pass, 70K → warn
+      expect(result!.proportion).toBe(0.75);
+    });
+
+    it('llms-txt-size: fail file exceeding fail threshold', () => {
+      const result = getCheckProportion(
+        makeResult('llms-txt-size', 'fail', {
+          sizes: [{ characters: 150_000 }],
+          thresholds: { pass: 50_000, fail: 100_000 },
+        }),
+        makeWeight(7, 0.5),
+      );
+      expect(result!.proportion).toBe(0.0);
+    });
+
+    it('llms-txt-size: returns undefined when sizes is empty', () => {
+      const result = getCheckProportion(
+        makeResult('llms-txt-size', 'warn', { sizes: [] }),
+        makeWeight(7, 0.5),
+      );
+      // Falls back to status-based
+      expect(result!.proportion).toBe(0.5);
+    });
+
+    it('markdown-url-support: all pages skipped yields zero proportion', () => {
+      const result = getCheckProportion(
+        makeResult('markdown-url-support', 'pass', {
+          pageResults: [
+            { url: '/a', skipped: true },
+            { url: '/b', skipped: true },
+          ],
+        }),
+        makeWeight(7, 0.5),
+      );
+      // All pages filtered out → countByStatus returns 0/0
+      expect(result!.proportion).toBe(0);
+      expect(result!.tested).toBe(0);
+    });
+
+    it('llms-txt-directive: all pages have errors', () => {
+      const result = getCheckProportion(
+        makeResult('llms-txt-directive-html', 'fail', {
+          pageResults: [
+            { url: '/a', error: 'fetch failed' },
+            { url: '/b', error: 'timeout' },
+          ],
+        }),
+        makeWeight(7, 0.6),
+      );
+      // All filtered out, countByStatus returns 0/0
+      expect(result!.proportion).toBe(0);
+      expect(result!.tested).toBe(0);
+    });
+
+    it('uses default warn coefficient of 0.5 when not provided', () => {
+      const result = getCheckProportion(makeResult('llms-txt-exists', 'warn'), makeWeight(10));
+      expect(result).toEqual({ proportion: 0.5, tested: 1 });
+    });
+  });
+
   describe('missing details fallbacks', () => {
     it('llms-txt-valid: falls back to status when no details', () => {
       const result = getCheckProportion(makeResult('llms-txt-valid', 'warn'), makeWeight(4, 0.75));
diff --git a/test/unit/scoring/resolutions.test.ts b/test/unit/scoring/resolutions.test.ts
index 30befba..71e2daa 100644
--- a/test/unit/scoring/resolutions.test.ts
+++ b/test/unit/scoring/resolutions.test.ts
@@ -63,6 +63,168 @@ describe('resolutions', () => {
     expect(text).toContain('8 of 50');
   });
 
+  it('returns "unknown" when sizes array is empty', () => {
+    const text = getResolution(r('llms-txt-size', 'warn', { sizes: [] }));
+    expect(text).toContain('unknown');
+  });
+
+  it('returns "unknown" when sizes field is missing', () => {
+    const text = getResolution(r('llms-txt-size', 'fail', {}));
+    expect(text).toContain('unknown');
+  });
+
+  it('returns 0 broken links when broken array is missing', () => {
+    const text = getResolution(r('llms-txt-links-resolve', 'fail', { testedLinks: 10 }));
+    expect(text).toContain('0 of 10');
+  });
+
+  it('returns 0 count when pageResults missing for markdown-url-support', () => {
+    const text = getResolution(r('markdown-url-support', 'warn', {}));
+    expect(text).toContain('0 of 0');
+  });
+
+  it('returns undefined for check with only a fail template when status is warn', () => {
+    const text = getResolution(r('markdown-code-fence-validity', 'warn'));
+    expect(text).toBeUndefined();
+  });
+
+  it('returns undefined for check with only a fail template (http-status-codes)', () => {
+    const text = getResolution(r('http-status-codes', 'warn'));
+    expect(text).toBeUndefined();
+  });
+
+  it('interpolates tabbed page counts', () => {
+    const text = getResolution(
+      r('tabbed-content-serialization', 'warn', {
+        tabbedPages: [{ status: 'warn' }, { status: 'warn' }, { status: 'pass' }],
+      }),
+    );
+    expect(text).toContain('2 pages');
+  });
+
+  it('handles empty tabbedPages array', () => {
+    const text = getResolution(
+      r('tabbed-content-serialization', 'fail', {
+        tabbedPages: [],
+      }),
+    );
+    expect(text).toContain('0 pages');
+  });
+
+  it('interpolates coverage details', () => {
+    const text = getResolution(
+      r('llms-txt-coverage', 'warn', {
+        missingCount: 12,
+        coverageRate: 85,
+        coverageWarnThreshold: 80,
+        coveragePassThreshold: 95,
+      }),
+    );
+    expect(text).toContain('85%');
+    expect(text).toContain('12 live');
+    expect(text).toContain('80-95%');
+  });
+
+  it('interpolates coverage fail details', () => {
+    const text = getResolution(
+      r('llms-txt-coverage', 'fail', {
+        missingCount: 50,
+        coverageRate: 60,
+        coverageWarnThreshold: 80,
+      }),
+    );
+    expect(text).toContain('60%');
+    expect(text).toContain('below 80%');
+    expect(text).toContain('50 live');
+  });
+
+  it('interpolates parity fail details with rounding', () => {
+    const text = getResolution(
+      r('markdown-content-parity', 'fail', {
+        failBucket: 3,
+        avgMissingPercent: 27.8,
+      }),
+    );
+    expect(text).toContain('3 pages');
+    expect(text).toContain('avg 28% missing');
+  });
+
+  it('uses default thresholds when details are empty for llms-txt-coverage warn', () => {
+    const text = getResolution(r('llms-txt-coverage', 'warn', {}));
+    expect(text).toContain('0%');
+    expect(text).toContain('80-95%');
+  });
+
+  it('uses default threshold for llms-txt-coverage fail', () => {
+    const text = getResolution(r('llms-txt-coverage', 'fail', {}));
+    expect(text).toContain('below 80%');
+  });
+
+  it('returns 0 for markdown-content-parity warn with empty details', () => {
+    const text = getResolution(r('markdown-content-parity', 'warn', {}));
+    expect(text).toContain('0 pages');
+  });
+
+  it('returns 0 for cache-header-hygiene warn with empty details', () => {
+    const text = getResolution(r('cache-header-hygiene', 'warn', {}));
+    expect(text).toContain('0 endpoints');
+  });
+
+  it('returns 0 for cache-header-hygiene fail with empty details', () => {
+    const text = getResolution(r('cache-header-hygiene', 'fail', {}));
+    expect(text).toContain('0 endpoints');
+  });
+
+  it('returns 0 for rendering-strategy warn with empty details', () => {
+    const text = getResolution(r('rendering-strategy', 'warn', {}));
+    expect(text).toContain('0 of 0');
+  });
+
+  it('returns 0 for rendering-strategy fail with empty details', () => {
+    const text = getResolution(r('rendering-strategy', 'fail', {}));
+    expect(text).toContain('0 of 0');
+  });
+
+  it('returns 0 for page-size-markdown warn with empty details', () => {
+    const text = getResolution(r('page-size-markdown', 'warn', {}));
+    expect(text).toContain('0 of 0');
+  });
+
+  it('returns 0 for page-size-markdown fail with empty details', () => {
+    const text = getResolution(r('page-size-markdown', 'fail', {}));
+    expect(text).toContain('0 of 0');
+  });
+
+  it('returns 0 for content-start-position warn with empty details', () => {
+    const text = getResolution(r('content-start-position', 'warn', {}));
+    expect(text).toContain('0 of 0');
+  });
+
+  it('returns 0 for content-start-position fail with empty details', () => {
+    const text = getResolution(r('content-start-position', 'fail', {}));
+    expect(text).toContain('0 of 0');
+  });
+
+  it('returns 0 for redirect-behavior warn with empty details', () => {
+    const text = getResolution(r('redirect-behavior', 'warn', {}));
+    expect(text).toContain('0 pages');
+  });
+
+  it('returns 0 for redirect-behavior fail with empty details', () => {
+    const text = getResolution(r('redirect-behavior', 'fail', {}));
+    expect(text).toContain('0 pages');
+  });
+
+  it('returns 0 for markdown-url-support warn with empty details', () => {
+    const text = getResolution(r('markdown-url-support', 'warn', {}));
+    expect(text).toContain('0 of 0');
+  });
+
+  it('returns 0 for llms-txt-links-resolve fail with empty details', () => {
+    const text = getResolution(r('llms-txt-links-resolve', 'fail', {}));
+    expect(text).toContain('0 of 0');
+  });
+
   it('provides resolution for every check with warn/fail', () => {
     const checkIds = [
       'llms-txt-exists',

From 74401d35b1490ea3cd8d2c6e518ebc42d82b95c3 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 26 Apr 2026 16:32:53 -0400
Subject: [PATCH 09/13] Add shared validation module for CLI and lib consumers

---
 docs/reference/programmatic-api.md            |  42 ++-
 src/checks/observability/llms-txt-coverage.ts |  18 +-
 .../observability/markdown-content-parity.ts  |  12 -
 src/cli/commands/check.ts                     |  77 ++--
 src/constants.ts                              |   9 +-
 src/helpers/config.ts                         |  88 +++--
 src/index.ts                                  |   9 +-
 src/runner.ts                                 |  15 +
 src/validation.ts                             | 219 +++++++++++
 test/unit/checks/llms-txt-coverage.test.ts    |  30 --
 .../checks/markdown-content-parity.test.ts    |  26 --
 test/unit/cli/check-command.test.ts           | 228 +++++++++++-
 test/unit/helpers/config.test.ts              | 164 ++++++++
 test/unit/helpers/get-page-urls.test.ts       |  35 +-
 test/unit/runner.test.ts                      |  32 ++
 test/unit/validation.test.ts                  | 349 ++++++++++++++++++
 16 files changed, 1186 insertions(+), 167 deletions(-)
 create mode 100644 src/validation.ts
 create mode 100644 test/unit/validation.test.ts

diff --git a/docs/reference/programmatic-api.md b/docs/reference/programmatic-api.md
index 809dabe..93c01fd 100644
--- a/docs/reference/programmatic-api.md
+++ b/docs/reference/programmatic-api.md
@@ -17,7 +17,7 @@ for (const result of report.results) {
 }
 ```
 
-`runChecks` returns a `ReportResult` containing:
+`runChecks` throws an `Error` if the options are invalid (see [Validate options](#validate-options) below). On success, it returns a `ReportResult` containing:
 
 - `url` — the URL that was checked
 - `timestamp` — when the check ran
@@ -78,7 +78,7 @@ console.log(result.status); // 'pass', 'warn', 'fail', or 'skip'
 console.log(result.message);
 ```
 
-`createContext` sets up the shared state (HTTP client, page cache, previous results) that checks use. If you run multiple checks against the same context, later checks can access the results of earlier ones through `ctx.previousResults`, which is how check dependencies work.
+`createContext` validates the options and throws an `Error` if any are invalid, then sets up the shared state (HTTP client, page cache, previous results) that checks use. If you run multiple checks against the same context, later checks can access the results of earlier ones through `ctx.previousResults`, which is how check dependencies work.
 
 ## List available checks
 
@@ -95,6 +95,42 @@ for (const check of sorted) {
 }
 ```
 
+## Validate options
+
+Both `runChecks` and `createContext` validate options and throw on invalid input. If you want to check options before calling either function (for example, to show validation errors in a UI or dry-run mode), use `validateRunnerOptions`:
+
+```ts
+import { validateRunnerOptions } from 'afdocs';
+
+const validation = validateRunnerOptions({
+  maxConcurrency: -1,
+  samplingStrategy: 'curated',
+});
+
+if (!validation.valid) {
+  for (const error of validation.errors) {
+    console.error(`${error.field}: ${error.message}`);
+    // maxConcurrency: maxConcurrency must be between 1 and 100, got -1
+    // samplingStrategy: Curated sampling requires curatedPages to be non-empty
+  }
+}
+```
+
+`validateRunnerOptions` returns a `ValidationResult` with `errors` and `warnings` arrays. Each entry has a `field` (the option name) and a `message` (human-readable description). The `valid` boolean is `true` when there are no errors.
+
+Validations performed include numeric range checking, threshold ordering (e.g. size pass threshold must be less than or equal to fail threshold), sampling strategy enum validation, and check ID existence.
+
+## Sampling strategies
+
+The valid sampling strategies are available as a constant:
+
+```ts
+import { VALID_SAMPLING_STRATEGIES } from 'afdocs';
+
+console.log(VALID_SAMPLING_STRATEGIES);
+// ['random', 'deterministic', 'curated', 'none']
+```
+
 ## Types
 
 The main types you'll work with:
@@ -110,6 +146,8 @@ import type {
   AgentDocsConfig,
   CuratedPageEntry,
   PageConfigEntry,
+  ValidationResult,
+  ValidationIssue,
 } from 'afdocs';
 ```
 
diff --git a/src/checks/observability/llms-txt-coverage.ts b/src/checks/observability/llms-txt-coverage.ts
index 1ddf47b..2c6704d 100644
--- a/src/checks/observability/llms-txt-coverage.ts
+++ b/src/checks/observability/llms-txt-coverage.ts
@@ -315,19 +315,10 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
   const id = 'llms-txt-coverage';
   const category = 'observability';
 
-  // Resolve thresholds: CLI/config overrides → defaults, clamped to [0, 100]
-  const clamp = (v: number) => Math.max(0, Math.min(100, v));
-  const rawPass = ctx.options.coveragePassThreshold ?? DEFAULT_COVERAGE_PASS_THRESHOLD;
-  const rawWarn = ctx.options.coverageWarnThreshold ?? DEFAULT_COVERAGE_WARN_THRESHOLD;
-  const passThreshold = clamp(rawPass) / 100;
-  const warnThreshold = clamp(rawWarn) / 100;
-  const thresholdWarnings: string[] = [];
-  if (passThreshold < warnThreshold) {
-    thresholdWarnings.push(
-      `coveragePassThreshold (${clamp(rawPass)}) is lower than ` +
-        `coverageWarnThreshold (${clamp(rawWarn)}); warn state is unreachable`,
-    );
-  }
+  const passThreshold =
+    (ctx.options.coveragePassThreshold ?? DEFAULT_COVERAGE_PASS_THRESHOLD) / 100;
+  const warnThreshold =
+    (ctx.options.coverageWarnThreshold ?? DEFAULT_COVERAGE_WARN_THRESHOLD) / 100;
 
   // Compile user-supplied exclusion patterns
   const userExclusionMatcher = compileExclusionMatcher(ctx.options.coverageExclusions ?? []);
@@ -561,7 +552,6 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
       unmatchedCount: unmatchedLlmsTxtUrls.length,
       unmatchedPct,
       sitemapWarnings,
-      ...(thresholdWarnings.length > 0 ? { thresholdWarnings } : {}),
     },
   };
 }
diff --git a/src/checks/observability/markdown-content-parity.ts b/src/checks/observability/markdown-content-parity.ts
index d97aaea..695e2b4 100644
--- a/src/checks/observability/markdown-content-parity.ts
+++ b/src/checks/observability/markdown-content-parity.ts
@@ -651,18 +651,6 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
   const failThreshold = ctx.options.parityWarnThreshold ?? DEFAULT_PARITY_WARN_THRESHOLD;
   const parityExclusions = ctx.options.parityExclusions;
 
-  if (warnThreshold > failThreshold && failThreshold > 0) {
-    return {
-      id,
-      category,
-      status: 'error',
-      message:
-        `parityPassThreshold (${warnThreshold}) is greater than ` +
-        `parityWarnThreshold (${failThreshold}). The pass threshold must be ` +
-        'less than or equal to the warn threshold.',
-    };
-  }
-
   const results: PageParityResult[] = [];
   const concurrency = ctx.options.maxConcurrency;
   let totalSegmentationStripped = 0;
diff --git a/src/cli/commands/check.ts b/src/cli/commands/check.ts
index c43d832..c5a8ce5 100644
--- a/src/cli/commands/check.ts
+++ b/src/cli/commands/check.ts
@@ -3,13 +3,13 @@ import { normalizeUrl, runChecks } from '../../runner.js';
 import { formatText } from '../formatters/text.js';
 import { formatJson } from '../formatters/json.js';
 import { formatScorecard } from '../formatters/scorecard.js';
-import type { PageConfigEntry, SamplingStrategy } from '../../types.js';
+import type { PageConfigEntry, RunnerOptions, SamplingStrategy } from '../../types.js';
 import { findConfig, validatePages } from '../../helpers/config.js';
+import { validateRunnerOptions } from '../../validation.js';
 
 // Ensure all checks are registered
 import '../../checks/index.js';
 
-const SAMPLING_STRATEGIES = ['random', 'deterministic', 'curated', 'none'] as const;
 const FORMAT_OPTIONS = ['text', 'json', 'scorecard'] as const;
 
 export function registerCheckCommand(program: Command): void {
@@ -152,21 +152,6 @@ export function registerCheckCommand(program: Command): void {
       }
 
       const sampling = samplingRaw as SamplingStrategy;
-      if (!SAMPLING_STRATEGIES.includes(sampling)) {
-        process.stderr.write(
-          `Error: Invalid sampling strategy "${sampling}". Must be one of: ${SAMPLING_STRATEGIES.join(', ')}\n`,
-        );
-        process.exitCode = 1;
-        return;
-      }
-
-      if (sampling === 'curated' && (!curatedPages || curatedPages.length === 0)) {
-        process.stderr.write(
-          'Error: Curated sampling requires pages. Use --urls or define "pages" in your config file.\n',
-        );
-        process.exitCode = 1;
-        return;
-      }
 
       const maxConcurrency = parseInt(
         String((opts.maxConcurrency as string | undefined) ?? config?.options?.maxConcurrency ?? 3),
@@ -211,37 +196,35 @@ export function registerCheckCommand(program: Command): void {
       const rawCanonical =
         (opts.canonicalOrigin as string | undefined) ?? config?.options?.canonicalOrigin;
       if (rawCanonical) {
+        const normalized = normalizeUrl(rawCanonical);
         try {
-          canonicalOrigin = new URL(normalizeUrl(rawCanonical)).origin;
+          canonicalOrigin = new URL(normalized).origin;
+          const targetOrigin = new URL(url).origin;
+          if (canonicalOrigin === targetOrigin) {
+            process.stderr.write(
+              `Warning: --canonical-origin "${canonicalOrigin}" is the same as the target origin. The flag has no effect.\n`,
+            );
+            canonicalOrigin = undefined;
+          }
         } catch {
-          process.stderr.write(`Error: Invalid --canonical-origin URL "${rawCanonical}".\n`);
-          process.exitCode = 1;
-          return;
-        }
-        const targetOrigin = new URL(url).origin;
-        if (canonicalOrigin === targetOrigin) {
-          process.stderr.write(
-            `Warning: --canonical-origin "${canonicalOrigin}" is the same as the target origin. The flag has no effect.\n`,
-          );
-          canonicalOrigin = undefined;
+          canonicalOrigin = normalized;
         }
       }
 
       let llmsTxtUrl: string | undefined;
       const rawLlmsTxtUrl = (opts.llmsTxtUrl as string | undefined) ?? config?.options?.llmsTxtUrl;
       if (rawLlmsTxtUrl) {
+        const normalized = normalizeUrl(rawLlmsTxtUrl);
         try {
-          llmsTxtUrl = new URL(normalizeUrl(rawLlmsTxtUrl)).toString();
+          llmsTxtUrl = new URL(normalized).toString();
+          const targetOrigin = new URL(url).origin;
+          if (new URL(llmsTxtUrl).origin !== targetOrigin) {
+            process.stderr.write(
+              `Warning: --llms-txt-url origin (${new URL(llmsTxtUrl).origin}) differs from target origin (${targetOrigin}). The flag will still be used as canonical.\n`,
+            );
+          }
         } catch {
-          process.stderr.write(`Error: Invalid --llms-txt-url "${rawLlmsTxtUrl}".\n`);
-          process.exitCode = 1;
-          return;
-        }
-        const targetOrigin = new URL(url).origin;
-        if (new URL(llmsTxtUrl).origin !== targetOrigin) {
-          process.stderr.write(
-            `Warning: --llms-txt-url origin (${new URL(llmsTxtUrl).origin}) differs from target origin (${targetOrigin}). The flag will still be used as canonical.\n`,
-          );
+          llmsTxtUrl = normalized;
         }
       }
 
@@ -279,7 +262,7 @@ export function registerCheckCommand(program: Command): void {
               .filter(Boolean)
           : (config?.options?.parityExclusions ?? undefined);
 
-      const report = await runChecks(url, {
+      const runnerOptions: Partial<RunnerOptions> = {
         checkIds,
         skipCheckIds,
         maxConcurrency,
@@ -301,7 +284,21 @@ export function registerCheckCommand(program: Command): void {
         ...(parityPassThreshold != null && { parityPassThreshold }),
         ...(parityWarnThreshold != null && { parityWarnThreshold }),
         ...(parityExclusions && { parityExclusions }),
-      });
+      };
+
+      const validation = validateRunnerOptions(runnerOptions);
+      if (!validation.valid) {
+        for (const err of validation.errors) {
+          process.stderr.write(`Error: ${err.message}\n`);
+        }
+        process.exitCode = 1;
+        return;
+      }
+      for (const warn of validation.warnings) {
+        process.stderr.write(`Warning: ${warn.message}\n`);
+      }
+
+      const report = await runChecks(url, runnerOptions);
 
       let output: string;
       if (format === 'json') {
diff --git a/src/constants.ts b/src/constants.ts
index af064b6..9205f20 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -1,4 +1,11 @@
-import type { CheckOptions, SizeThresholds } from './types.js';
+import type { CheckOptions, SamplingStrategy, SizeThresholds } from './types.js';
+
+export const VALID_SAMPLING_STRATEGIES: readonly SamplingStrategy[] = [
+  'random',
+  'deterministic',
+  'curated',
+  'none',
+];
 
 export const DEFAULT_THRESHOLDS: SizeThresholds = {
   pass: 50_000,
diff --git a/src/helpers/config.ts b/src/helpers/config.ts
index 1e12ce4..2f21f72 100644
--- a/src/helpers/config.ts
+++ b/src/helpers/config.ts
@@ -2,6 +2,8 @@ import { readFile } from 'node:fs/promises';
 import { dirname, resolve } from 'node:path';
 import { parse as parseYaml } from 'yaml';
 import type { AgentDocsConfig, PageConfigEntry } from '../types.js';
+import { validateNumber } from '../validation.js';
+import { VALID_SAMPLING_STRATEGIES } from '../constants.js';
 
 const CONFIG_FILENAMES = ['agent-docs.config.yml', 'agent-docs.config.yaml'];
 
@@ -58,6 +60,17 @@ function validateStringArray(value: unknown, field: string, source: string): voi
   }
 }
 
+const NUMERIC_OPTION_RULES: [string, { integer?: boolean; min?: number; max?: number }][] = [
+  ['maxConcurrency', { integer: true, min: 1, max: 100 }],
+  ['requestDelay', { integer: true, min: 0 }],
+  ['requestTimeout', { integer: true, min: 0 }],
+  ['maxLinksToTest', { integer: true, min: 1 }],
+  ['coveragePassThreshold', { integer: true, min: 0, max: 100 }],
+  ['coverageWarnThreshold', { integer: true, min: 0, max: 100 }],
+  ['parityPassThreshold', { integer: true, min: 0, max: 100 }],
+  ['parityWarnThreshold', { integer: true, min: 0, max: 100 }],
+];
+
 function validateOptions(options: Record<string, unknown>, source: string): void {
   if (options.coverageExclusions != null) {
     validateStringArray(options.coverageExclusions, 'options.coverageExclusions', source);
@@ -65,6 +78,57 @@ function validateOptions(options: Record<string, unknown>, source: string): void
   if (options.parityExclusions != null) {
     validateStringArray(options.parityExclusions, 'options.parityExclusions', source);
   }
+  if (
+    options.samplingStrategy != null &&
+    !VALID_SAMPLING_STRATEGIES.includes(
+      options.samplingStrategy as string as (typeof VALID_SAMPLING_STRATEGIES)[number],
+    )
+  ) {
+    throw new Error(
+      `${source}: options.samplingStrategy must be one of: ${VALID_SAMPLING_STRATEGIES.join(', ')}`,
+    );
+  }
+  for (const [field, constraints] of NUMERIC_OPTION_RULES) {
+    if (options[field] != null) {
+      const issue = validateNumber(options[field], `options.${field}`, constraints);
+      if (issue) {
+        throw new Error(`${source}: ${issue.message}`);
+      }
+    }
+  }
+  if (options.thresholds != null) {
+    const thresholds = options.thresholds as Record<string, unknown>;
+    if (thresholds.pass != null) {
+      const issue = validateNumber(thresholds.pass, 'options.thresholds.pass', {
+        integer: true,
+        min: 1,
+      });
+      if (issue) throw new Error(`${source}: ${issue.message}`);
+    }
+    if (thresholds.fail != null) {
+      const issue = validateNumber(thresholds.fail, 'options.thresholds.fail', {
+        integer: true,
+        min: 1,
+      });
+      if (issue) throw new Error(`${source}: ${issue.message}`);
+    }
+  }
+}
+
+function validateConfig(parsed: AgentDocsConfig, source: string): void {
+  if (parsed.checks != null) {
+    validateStringArray(parsed.checks, 'checks', source);
+  }
+  if (parsed.skipChecks != null) {
+    validateStringArray(parsed.skipChecks, 'skipChecks', source);
+  }
+  if (parsed.pages) {
+    assertPagesArray(parsed.pages, source);
+    validatePages(parsed.pages, source);
+  }
+  if (parsed.options) {
+    validateOptions(parsed.options as Record<string, unknown>, source);
+  }
 }
 
 /**
@@ -84,13 +148,7 @@ export async function loadConfig(dir?: string): Promise<AgentDocsConfig> {
         if (!parsed.url) {
           throw new Error(`Config file ${filepath} is missing required "url" field`);
         }
-        if (parsed.pages) {
-          assertPagesArray(parsed.pages, filepath);
-          validatePages(parsed.pages, filepath);
-        }
-        if (parsed.options) {
-          validateOptions(parsed.options as Record<string, unknown>, filepath);
-        }
+        validateConfig(parsed, filepath);
         return parsed;
       } catch (err) {
         if ((err as NodeJS.ErrnoException).code === 'ENOENT') continue;
@@ -123,13 +181,7 @@ export async function findConfig(
     const filepath = resolve(process.cwd(), explicitPath);
     const content = await readFile(filepath, 'utf-8');
     const parsed = parseYaml(content) as AgentDocsConfig;
-    if (parsed.pages) {
-      assertPagesArray(parsed.pages, filepath);
-      validatePages(parsed.pages, filepath);
-    }
-    if (parsed.options) {
-      validateOptions(parsed.options as Record<string, unknown>, filepath);
-    }
+    validateConfig(parsed, filepath);
     return parsed;
   }
 
@@ -140,13 +192,7 @@ export async function findConfig(
       try {
         const content = await readFile(filepath, 'utf-8');
         const parsed = parseYaml(content) as AgentDocsConfig;
-        if (parsed.pages) {
-          assertPagesArray(parsed.pages, filepath);
-          validatePages(parsed.pages, filepath);
-        }
-        if (parsed.options) {
-          validateOptions(parsed.options as Record<string, unknown>, filepath);
-        }
+        validateConfig(parsed, filepath);
         return parsed;
       } catch (err) {
         if ((err as NodeJS.ErrnoException).code === 'ENOENT') continue;
diff --git a/src/index.ts b/src/index.ts
index 06942b7..4c22207 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -16,7 +16,14 @@ export type {
   PageConfigEntry,
 } from './types.js';
 
-export { DEFAULT_OPTIONS, DEFAULT_THRESHOLDS, CATEGORIES } from './constants.js';
+export {
+  DEFAULT_OPTIONS,
+  DEFAULT_THRESHOLDS,
+  CATEGORIES,
+  VALID_SAMPLING_STRATEGIES,
+} from './constants.js';
+export { validateRunnerOptions } from './validation.js';
+export type { ValidationResult, ValidationIssue } from './validation.js';
 export { createContext, normalizeUrl, runChecks } from './runner.js';
 export { createHttpClient } from './http.js';
 export { getAllChecks, getCheck, getChecksSorted, extractMarkdownLinks } from './checks/index.js';
diff --git a/src/runner.ts b/src/runner.ts
index 1745dec..8bdd0ec 100644
--- a/src/runner.ts
+++ b/src/runner.ts
@@ -2,6 +2,7 @@ import type { CheckContext, CheckResult, RunnerOptions, ReportResult } from './t
 import { DEFAULT_OPTIONS, SPEC_BASE_URL } from './constants.js';
 import { createHttpClient } from './http.js';
 import { getChecksSorted } from './checks/registry.js';
+import { validateRunnerOptions } from './validation.js';
 
 /**
  * Normalize dependsOn to the internal format: array of OR-groups.
@@ -41,6 +42,20 @@ export function normalizeUrl(url: string): string {
 }
 
 export function createContext(baseUrl: string, options?: Partial<RunnerOptions>): CheckContext {
+  if (options) {
+    if (options.canonicalOrigin) {
+      options = { ...options, canonicalOrigin: normalizeUrl(options.canonicalOrigin) };
+    }
+    if (options.llmsTxtUrl) {
+      options = { ...options, llmsTxtUrl: normalizeUrl(options.llmsTxtUrl) };
+    }
+    const validation = validateRunnerOptions(options);
+    if (!validation.valid) {
+      const messages = validation.errors.map((e) => `${e.field}: ${e.message}`);
+      throw new Error(`Invalid options: ${messages.join('; ')}`);
+    }
+  }
+
   const merged = { ...DEFAULT_OPTIONS, ...options };
   baseUrl = normalizeUrl(baseUrl);
   const url = new URL(baseUrl);
diff --git a/src/validation.ts b/src/validation.ts
new file mode 100644
index 0000000..4d814d2
--- /dev/null
+++ b/src/validation.ts
@@ -0,0 +1,219 @@
+import type { RunnerOptions, SamplingStrategy } from './types.js';
+import { VALID_SAMPLING_STRATEGIES } from './constants.js';
+import { getAllChecks } from './checks/registry.js';
+
+export interface ValidationIssue {
+  field: string;
+  message: string;
+}
+
+export interface ValidationResult {
+  valid: boolean;
+  errors: ValidationIssue[];
+  warnings: ValidationIssue[];
+}
+
+export interface NumericConstraints {
+  integer?: boolean;
+  min?: number;
+  max?: number;
+}
+
+export function validateNumber(
+  value: unknown,
+  field: string,
+  constraints: NumericConstraints,
+): ValidationIssue | null {
+  if (value === undefined || value === null) return null;
+  if (typeof value !== 'number' || Number.isNaN(value)) {
+    return {
+      field,
+      message: `${field} must be a valid number, got ${typeof value === 'number' ? 'NaN' : `${typeof value} "${value}"`}`,
+    };
+  }
+  if (constraints.integer && !Number.isInteger(value)) {
+    return { field, message: `${field} must be an integer, got ${value}` };
+  }
+  if (constraints.min !== undefined && value < constraints.min) {
+    const bound =
+      constraints.max !== undefined
+        ? `between ${constraints.min} and ${constraints.max}`
+        : `at least ${constraints.min}`;
+    return { field, message: `${field} must be ${bound}, got ${value}` };
+  }
+  if (constraints.max !== undefined && value > constraints.max) {
+    const bound =
+      constraints.min !== undefined
+        ? `between ${constraints.min} and ${constraints.max}`
+        : `at most ${constraints.max}`;
+    return { field, message: `${field} must be ${bound}, got ${value}` };
+  }
+  return null;
+}
+
+function validateUrl(value: unknown, field: string): ValidationIssue | null {
+  if (value === undefined || value === null) return null;
+  if (typeof value !== 'string') {
+    return { field, message: `${field} must be a string` };
+  }
+  try {
+    new URL(value);
+    return null;
+  } catch {
+    return { field, message: `${field} is not a valid URL: "${value}"` };
+  }
+}
+
+export function validateRunnerOptions(options: Partial<RunnerOptions>): ValidationResult {
+  const errors: ValidationIssue[] = [];
+  const warnings: ValidationIssue[] = [];
+
+  const pushError = (issue: ValidationIssue | null) => {
+    if (issue) errors.push(issue);
+  };
+
+  // Numeric range validations
+  pushError(
+    validateNumber(options.maxConcurrency, 'maxConcurrency', { integer: true, min: 1, max: 100 }),
+  );
+  pushError(validateNumber(options.requestDelay, 'requestDelay', { integer: true, min: 0 }));
+  pushError(validateNumber(options.requestTimeout, 'requestTimeout', { integer: true, min: 0 }));
+  pushError(validateNumber(options.maxLinksToTest, 'maxLinksToTest', { integer: true, min: 1 }));
+
+  if (options.thresholds) {
+    pushError(
+      validateNumber(options.thresholds.pass, 'thresholds.pass', { integer: true, min: 1 }),
+    );
+    pushError(
+      validateNumber(options.thresholds.fail, 'thresholds.fail', { integer: true, min: 1 }),
+    );
+  }
+
+  pushError(
+    validateNumber(options.coveragePassThreshold, 'coveragePassThreshold', {
+      integer: true,
+      min: 0,
+      max: 100,
+    }),
+  );
+  pushError(
+    validateNumber(options.coverageWarnThreshold, 'coverageWarnThreshold', {
+      integer: true,
+      min: 0,
+      max: 100,
+    }),
+  );
+  pushError(
+    validateNumber(options.parityPassThreshold, 'parityPassThreshold', {
+      integer: true,
+      min: 0,
+      max: 100,
+    }),
+  );
+  pushError(
+    validateNumber(options.parityWarnThreshold, 'parityWarnThreshold', {
+      integer: true,
+      min: 0,
+      max: 100,
+    }),
+  );
+
+  // Threshold ordering (only when both in a pair are provided and individually valid)
+  if (
+    options.thresholds &&
+    typeof options.thresholds.pass === 'number' &&
+    typeof options.thresholds.fail === 'number' &&
+    !Number.isNaN(options.thresholds.pass) &&
+    !Number.isNaN(options.thresholds.fail) &&
+    options.thresholds.pass > options.thresholds.fail
+  ) {
+    errors.push({
+      field: 'thresholds',
+      message: `thresholds.pass (${options.thresholds.pass}) must be less than or equal to thresholds.fail (${options.thresholds.fail})`,
+    });
+  }
+
+  if (
+    options.coveragePassThreshold !== undefined &&
+    options.coverageWarnThreshold !== undefined &&
+    !Number.isNaN(options.coveragePassThreshold) &&
+    !Number.isNaN(options.coverageWarnThreshold) &&
+    options.coveragePassThreshold < options.coverageWarnThreshold
+  ) {
+    errors.push({
+      field: 'coveragePassThreshold',
+      message: `coveragePassThreshold (${options.coveragePassThreshold}) must be greater than or equal to coverageWarnThreshold (${options.coverageWarnThreshold})`,
+    });
+  }
+
+  if (
+    options.parityPassThreshold !== undefined &&
+    options.parityWarnThreshold !== undefined &&
+    !Number.isNaN(options.parityPassThreshold) &&
+    !Number.isNaN(options.parityWarnThreshold) &&
+    options.parityPassThreshold > options.parityWarnThreshold
+  ) {
+    errors.push({
+      field: 'parityPassThreshold',
+      message: `parityPassThreshold (${options.parityPassThreshold}) must be less than or equal to parityWarnThreshold (${options.parityWarnThreshold})`,
+    });
+  }
+
+  // Sampling strategy enum
+  if (
+    options.samplingStrategy !== undefined &&
+    !VALID_SAMPLING_STRATEGIES.includes(options.samplingStrategy as SamplingStrategy)
+  ) {
+    errors.push({
+      field: 'samplingStrategy',
+      message: `Invalid sampling strategy "${options.samplingStrategy}". Must be one of: ${VALID_SAMPLING_STRATEGIES.join(', ')}`,
+    });
+  }
+
+  // Curated requires pages
+  if (
+    options.samplingStrategy === 'curated' &&
+    (!options.curatedPages || options.curatedPages.length === 0)
+  ) {
+    errors.push({
+      field: 'samplingStrategy',
+      message: 'Curated sampling requires curatedPages to be non-empty',
+    });
+  }
+
+  // URL validation
+  pushError(validateUrl(options.canonicalOrigin, 'canonicalOrigin'));
+  pushError(validateUrl(options.llmsTxtUrl, 'llmsTxtUrl'));
+
+  // Check ID validation
+  if (
+    (options.checkIds && options.checkIds.length > 0) ||
+    (options.skipCheckIds && options.skipCheckIds.length > 0)
+  ) {
+    const knownIds = getAllChecks().map((c) => c.id);
+    const knownSet = new Set(knownIds);
+
+    const validateCheckId = (id: string, field: string) => {
+      if (knownSet.has(id)) return;
+      const sortedIds = [...knownIds].sort();
+      const hint = sortedIds.join(', ');
+      errors.push({
+        field,
+        message: `Unknown check ID "${id}". Available checks: ${hint}`,
+      });
+    };
+
+    if (options.checkIds) {
+      for (const id of options.checkIds) validateCheckId(id, 'checkIds');
+    }
+    if (options.skipCheckIds) {
+      for (const id of options.skipCheckIds) validateCheckId(id, 'skipCheckIds');
+    }
+  }
+
+  return {
+    valid: errors.length === 0,
+    errors,
+    warnings,
+  };
+}
diff --git a/test/unit/checks/llms-txt-coverage.test.ts b/test/unit/checks/llms-txt-coverage.test.ts
index aca7a4f..c58e891 100644
--- a/test/unit/checks/llms-txt-coverage.test.ts
+++ b/test/unit/checks/llms-txt-coverage.test.ts
@@ -798,36 +798,6 @@ describe('filterToUnprefixedLocale', () => {
 });
 
 describe('edge cases', () => {
-  test('warns when passThreshold < warnThreshold', async () => {
-    const host = 'cov-threshold-warn.local';
-    const allPages = Array.from({ length: 10 }, (_, i) => `http://${host}/docs/page-${i}`);
-    const llmsPages = allPages.slice(0, 9);
-
-    const ctx = makeCtx(host, llmsPages, '/docs');
-    ctx.options.coveragePassThreshold = 50;
-    ctx.options.coverageWarnThreshold = 80;
-
-    server.use(
-      http.get(
-        `http://${host}/robots.txt`,
-        () => new HttpResponse(`Sitemap: http://${host}/sitemap.xml`, { status: 200 }),
-      ),
-      http.get(
-        `http://${host}/sitemap.xml`,
-        () =>
-          new HttpResponse(makeSitemap(allPages), {
-            status: 200,
-            headers: { 'content-type': 'application/xml' },
-          }),
-      ),
-    );
-
-    const result = await check.run(ctx);
-    expect(result.details?.thresholdWarnings).toBeDefined();
-    const warnings = result.details?.thresholdWarnings as string[];
-    expect(warnings[0]).toContain('warn state is unreachable');
-  });
-
   test('handles malformed URLs in sitemap gracefully', async () => {
     const host = 'cov-malformed.local';
     const goodPages = [`http://${host}/docs/guide`];
diff --git a/test/unit/checks/markdown-content-parity.test.ts b/test/unit/checks/markdown-content-parity.test.ts
index c2f198b..a28c8ab 100644
--- a/test/unit/checks/markdown-content-parity.test.ts
+++ b/test/unit/checks/markdown-content-parity.test.ts
@@ -2052,32 +2052,6 @@ All requests are authenticated automatically using the configured API credential
     expect(result.details?.parityWarnThreshold).toBe(30);
   });
 
-  it('returns error when parityPassThreshold exceeds parityWarnThreshold', async () => {
-    const html =
-      '<html><body><h1>Page</h1><p>Content that matches between HTML and markdown versions exactly.</p></body></html>';
-    const markdown = '# Page\n\nContent that matches between HTML and markdown versions exactly.';
-    const url = 'http://mcp-inverted.local/docs/page';
-
-    server.use(
-      http.get(
-        url,
-        () =>
-          new HttpResponse(html, {
-            status: 200,
-            headers: { 'Content-Type': 'text/html' },
-          }),
-      ),
-    );
-
-    const ctx = makeCtx([{ url, markdown, htmlBody: html }], 'mcp-inverted.local', {
-      parityPassThreshold: 20,
-      parityWarnThreshold: 5,
-    });
-    const result = await check.run(ctx);
-    expect(result.status).toBe('error');
-    expect(result.message).toContain('greater than');
-  });
-
   it('reports a clear error for invalid CSS selectors in parityExclusions', async () => {
     const html = `<html><body><main>
       <h1>Page Title</h1>
diff --git a/test/unit/cli/check-command.test.ts b/test/unit/cli/check-command.test.ts
index 00b8858..e76f336 100644
--- a/test/unit/cli/check-command.test.ts
+++ b/test/unit/cli/check-command.test.ts
@@ -3,6 +3,7 @@ import { http, HttpResponse } from 'msw';
 import { setupServer } from 'msw/node';
 import { writeFile, mkdir, rm } from 'node:fs/promises';
 import { resolve } from 'node:path';
+import * as validationMod from '../../../src/validation.js';
 
 const VALID_LLMS_TXT = `# Test
 
@@ -423,7 +424,7 @@ describe('check command config integration', () => {
     await new Promise((r) => setTimeout(r, 100));
 
     const output = stderrSpy.mock.calls.map((c) => c[0]).join('');
-    expect(output).toContain('Curated sampling requires pages');
+    expect(output).toContain('Curated sampling requires curatedPages to be non-empty');
     expect(process.exitCode).toBe(1);
 
     stderrSpy.mockRestore();
@@ -631,7 +632,7 @@ describe('check command config integration', () => {
     await new Promise((r) => setTimeout(r, 100));
 
     const output = stderrSpy.mock.calls.map((c) => c[0]).join('');
-    expect(output).toContain('Invalid --llms-txt-url');
+    expect(output).toContain('llmsTxtUrl is not a valid URL');
     expect(process.exitCode).toBe(1);
 
     stderrSpy.mockRestore();
@@ -676,6 +677,229 @@ describe('check command config integration', () => {
     stderrSpy.mockRestore();
   });
 
+  it('errors when config file is invalid YAML', async () => {
+    const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
+
+    const configPath = resolve(CONFIG_TMP, 'bad-config.yml');
+    await writeFile(configPath, ':\ninvalid: [yaml\n');
+
+    const { run } = await import('../../../src/cli/index.js');
+    await run(['node', 'afdocs', 'check', '--config', configPath, '--request-delay', '0']);
+    await new Promise((r) => setTimeout(r, 100));
+
+    const output = stderrSpy.mock.calls.map((c) => c[0]).join('');
+    expect(output).toContain('Error:');
+    expect(process.exitCode).toBe(1);
+
+    stderrSpy.mockRestore();
+  });
+
+  it('parses --skip-checks flag', async () => {
+    server.use(
+      http.get('http://cmd-skip.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)),
+      http.get(
+        'http://cmd-skip.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+    );
+
+    const writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true);
+
+    const { run } = await import('../../../src/cli/index.js');
+    await run([
+      'node',
+      'afdocs',
+      'check',
+      'http://cmd-skip.local',
+      '--checks',
+      'llms-txt-exists,llms-txt-valid',
+      '--skip-checks',
+      'llms-txt-valid',
+      '--format',
+      'json',
+      '--request-delay',
+      '0',
+    ]);
+    await new Promise((r) => setTimeout(r, 100));
+
+    const output = writeSpy.mock.calls.map((c) => c[0]).join('');
+    const parsed = JSON.parse(output.trim());
+    const skipped = parsed.results.find((r: { id: string }) => r.id === 'llms-txt-valid');
+    expect(skipped.status).toBe('skip');
+
+    writeSpy.mockRestore();
+  });
+
+  it('warns when --canonical-origin matches target origin', async () => {
+    server.use(
+      http.get('http://cmd-canon-same.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)),
+      http.get(
+        'http://cmd-canon-same.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+    );
+
+    const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true);
+    const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
+
+    const { run } = await import('../../../src/cli/index.js');
+    await run([
+      'node',
+      'afdocs',
+      'check',
+      'http://cmd-canon-same.local',
+      '--canonical-origin',
+      'http://cmd-canon-same.local',
+      '--checks',
+      'llms-txt-exists',
+      '--request-delay',
+      '0',
+    ]);
+    await new Promise((r) => setTimeout(r, 100));
+
+    const stderr = stderrSpy.mock.calls.map((c) => c[0]).join('');
+    expect(stderr).toContain('same as the target origin');
+    expect(stderr).toContain('no effect');
+
+    stdoutSpy.mockRestore();
+    stderrSpy.mockRestore();
+  });
+
+  it('falls through invalid --canonical-origin to validation', async () => {
+    const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
+
+    const { run } = await import('../../../src/cli/index.js');
+    await run([
+      'node',
+      'afdocs',
+      'check',
+      'http://cmd-canon-bad.local',
+      '--canonical-origin',
+      ':::not-a-url:::',
+      '--checks',
+      'llms-txt-exists',
+      '--request-delay',
+      '0',
+    ]);
+    await new Promise((r) => setTimeout(r, 100));
+
+    const output = stderrSpy.mock.calls.map((c) => c[0]).join('');
+    expect(output).toContain('canonicalOrigin');
+    expect(output).toContain('not a valid URL');
+    expect(process.exitCode).toBe(1);
+
+    stderrSpy.mockRestore();
+  });
+
+  it('passes --coverage-exclusions to runner', async () => {
+    server.use(
+      http.get('http://cmd-cov-excl.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)),
+      http.get(
+        'http://cmd-cov-excl.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+    );
+
+    const writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true);
+
+    const { run } = await import('../../../src/cli/index.js');
+    await run([
+      'node',
+      'afdocs',
+      'check',
+      'http://cmd-cov-excl.local',
+      '--checks',
+      'llms-txt-exists',
+      '--coverage-exclusions',
+      '/docs/ref/**,/docs/changelog/**',
+      '--format',
+      'json',
+      '--request-delay',
+      '0',
+    ]);
+    await new Promise((r) => setTimeout(r, 100));
+
+    const output = writeSpy.mock.calls.map((c) => c[0]).join('');
+    const parsed = JSON.parse(output.trim());
+    expect(parsed.results[0].status).toBe('pass');
+
+    writeSpy.mockRestore();
+  });
+
+  it('passes --parity-exclusions to runner', async () => {
+    server.use(
+      http.get('http://cmd-par-excl.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)),
+      http.get(
+        'http://cmd-par-excl.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+    );
+
+    const writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true);
+
+    const { run } = await import('../../../src/cli/index.js');
+    await run([
+      'node',
+      'afdocs',
+      'check',
+      'http://cmd-par-excl.local',
+      '--checks',
+      'llms-txt-exists',
+      '--parity-exclusions',
+      '.nav-content,[data-human-only]',
+      '--format',
+      'json',
+      '--request-delay',
+      '0',
+    ]);
+    await new Promise((r) => setTimeout(r, 100));
+
+    const output = writeSpy.mock.calls.map((c) => c[0]).join('');
+    const parsed = JSON.parse(output.trim());
+    expect(parsed.results[0].status).toBe('pass');
+
+    writeSpy.mockRestore();
+  });
+
+  it('displays validation warnings on stderr', async () => {
+    server.use(
+      http.get('http://cmd-warn.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)),
+      http.get(
+        'http://cmd-warn.local/docs/llms.txt',
+        () => new HttpResponse(null, { status: 404 }),
+      ),
+    );
+
+    const spy = vi.spyOn(validationMod, 'validateRunnerOptions').mockReturnValueOnce({
+      valid: true,
+      errors: [],
+      warnings: [{ field: 'testField', message: 'This is a test warning' }],
+    });
+
+    const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true);
+    const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
+
+    const { run } = await import('../../../src/cli/index.js');
+    await run([
+      'node',
+      'afdocs',
+      'check',
+      'http://cmd-warn.local',
+      '--checks',
+      'llms-txt-exists',
+      '--request-delay',
+      '0',
+    ]);
+    await new Promise((r) => setTimeout(r, 100));
+
+    const stderr = stderrSpy.mock.calls.map((c) => c[0]).join('');
+    expect(stderr).toContain('Warning: This is a test warning');
+
+    spy.mockRestore();
+    stdoutSpy.mockRestore();
+    stderrSpy.mockRestore();
+  });
+
   it('infers base URL from config pages when url field is omitted', async () => {
     server.use(
       http.get('http://cfg-infer.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)),
diff --git a/test/unit/helpers/config.test.ts b/test/unit/helpers/config.test.ts
index b34dc04..f89f270 100644
--- a/test/unit/helpers/config.test.ts
+++ b/test/unit/helpers/config.test.ts
@@ -247,6 +247,25 @@ describe('findConfig', () => {
     await expect(findConfig(configPath)).rejects.toThrow('pages[0] must be a URL string or');
   });
 
+  it('throws on invalid URL in pages object url field', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    const configPath = resolve(TMP_DIR, 'bad-obj-url.yml');
+    await writeFile(configPath, 'url: https://example.com\npages:\n  - url: not-a-valid-url\n');
+
+    await expect(findConfig(configPath)).rejects.toThrow('pages[0].url is not a valid URL');
+  });
+
+  it('throws on non-string tag in pages object entry', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    const configPath = resolve(TMP_DIR, 'bad-tag.yml');
+    await writeFile(
+      configPath,
+      'url: https://example.com\npages:\n  - url: https://example.com/a\n    tag: 42\n',
+    );
+
+    await expect(findConfig(configPath)).rejects.toThrow('pages[0].tag must be a string');
+  });
+
   it('throws when pages is a scalar instead of an array', async () => {
     await mkdir(TMP_DIR, { recursive: true });
     const configPath = resolve(TMP_DIR, 'scalar-pages.yml');
@@ -338,4 +357,149 @@ describe('findConfig', () => {
 
     await expect(loadConfig(TMP_DIR)).rejects.toThrow('parityExclusions[0] must be a string');
   });
+
+  it('throws on invalid numeric option in config', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      ['url: https://example.com', 'options:', '  maxConcurrency: -5', ''].join('\n'),
+    );
+
+    await expect(findConfig(undefined, TMP_DIR)).rejects.toThrow('options.maxConcurrency');
+  });
+
+  it('throws on non-numeric option value in config', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      ['url: https://example.com', 'options:', '  maxConcurrency: fast', ''].join('\n'),
+    );
+
+    await expect(findConfig(undefined, TMP_DIR)).rejects.toThrow('options.maxConcurrency');
+  });
+
+  it('throws on out-of-range coverage threshold in config', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      ['url: https://example.com', 'options:', '  coveragePassThreshold: 150', ''].join('\n'),
+    );
+
+    await expect(findConfig(undefined, TMP_DIR)).rejects.toThrow('options.coveragePassThreshold');
+  });
+
+  it('accepts valid numeric options in config', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      [
+        'url: https://example.com',
+        'options:',
+        '  maxConcurrency: 5',
+        '  requestDelay: 100',
+        '  coveragePassThreshold: 95',
+        '',
+      ].join('\n'),
+    );
+
+    const config = await findConfig(undefined, TMP_DIR);
+    expect(config?.options?.maxConcurrency).toBe(5);
+  });
+
+  it('throws on invalid samplingStrategy in config', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      ['url: https://example.com', 'options:', '  samplingStrategy: fastt', ''].join('\n'),
+    );
+
+    await expect(findConfig(undefined, TMP_DIR)).rejects.toThrow('samplingStrategy');
+  });
+
+  it('accepts valid samplingStrategy in config', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      ['url: https://example.com', 'options:', '  samplingStrategy: deterministic', ''].join('\n'),
+    );
+
+    const config = await findConfig(undefined, TMP_DIR);
+    expect(config?.options?.samplingStrategy).toBe('deterministic');
+  });
+
+  it('throws when checks is not a string array', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      ['url: https://example.com', 'checks:', '  - 42', ''].join('\n'),
+    );
+
+    await expect(findConfig(undefined, TMP_DIR)).rejects.toThrow('checks[0] must be a string');
+  });
+
+  it('throws when skipChecks is not a string array', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      ['url: https://example.com', 'skipChecks: true', ''].join('\n'),
+    );
+
+    await expect(findConfig(undefined, TMP_DIR)).rejects.toThrow(
+      'skipChecks" must be an array of strings',
+    );
+  });
+
+  it('validates checks in loadConfig too', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      ['url: https://example.com', 'checks: not-an-array', ''].join('\n'),
+    );
+
+    await expect(loadConfig(TMP_DIR)).rejects.toThrow('checks" must be an array of strings');
+  });
+
+  it('throws on invalid thresholds.pass in config', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      [
+        'url: https://example.com',
+        'options:',
+        '  thresholds:',
+        '    pass: -1',
+        '    fail: 100000',
+        '',
+      ].join('\n'),
+    );
+
+    await expect(findConfig(undefined, TMP_DIR)).rejects.toThrow('options.thresholds.pass');
+  });
+
+  it('throws on invalid thresholds.fail in config', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      [
+        'url: https://example.com',
+        'options:',
+        '  thresholds:',
+        '    pass: 50000',
+        '    fail: 0',
+        '',
+      ].join('\n'),
+    );
+
+    await expect(findConfig(undefined, TMP_DIR)).rejects.toThrow('options.thresholds.fail');
+  });
+
+  it('validates skipChecks in loadConfig', async () => {
+    await mkdir(TMP_DIR, { recursive: true });
+    await writeFile(
+      resolve(TMP_DIR, 'agent-docs.config.yml'),
+      ['url: https://example.com', 'skipChecks:', '  - 42', ''].join('\n'),
+    );
+
+    await expect(loadConfig(TMP_DIR)).rejects.toThrow('skipChecks[0] must be a string');
+  });
 });
diff --git a/test/unit/helpers/get-page-urls.test.ts b/test/unit/helpers/get-page-urls.test.ts
index c04ade9..22322aa 100644
--- a/test/unit/helpers/get-page-urls.test.ts
+++ b/test/unit/helpers/get-page-urls.test.ts
@@ -2037,11 +2037,12 @@ describe('discoverAndSamplePages', () => {
   });
 
   it('curated strategy returns configured URLs without discovery', async () => {
+    const curatedPages = ['http://curated.local/page-a', 'http://curated.local/page-b'];
     const ctx = createContext('http://curated.local', {
       requestDelay: 0,
       samplingStrategy: 'curated',
+      curatedPages,
     });
-    ctx._curatedPages = ['http://curated.local/page-a', 'http://curated.local/page-b'];
 
     const result = await discoverAndSamplePages(ctx);
     expect(result.urls).toEqual(['http://curated.local/page-a', 'http://curated.local/page-b']);
@@ -2051,15 +2052,16 @@ describe('discoverAndSamplePages', () => {
   });
 
   it('curated strategy with tagged objects populates urlTags', async () => {
-    const ctx = createContext('http://curated-tags.local', {
-      requestDelay: 0,
-      samplingStrategy: 'curated',
-    });
-    ctx._curatedPages = [
+    const curatedPages = [
       'http://curated-tags.local/page-a',
       { url: 'http://curated-tags.local/page-b', tag: 'api' },
       { url: 'http://curated-tags.local/page-c', tag: 'guides' },
     ];
+    const ctx = createContext('http://curated-tags.local', {
+      requestDelay: 0,
+      samplingStrategy: 'curated',
+      curatedPages,
+    });
 
     const result = await discoverAndSamplePages(ctx);
     expect(result.urls).toHaveLength(3);
@@ -2069,17 +2071,14 @@ describe('discoverAndSamplePages', () => {
     });
   });
 
-  it('curated strategy with empty pages falls back to baseUrl', async () => {
-    const ctx = createContext('http://curated-empty.local', {
-      requestDelay: 0,
-      samplingStrategy: 'curated',
-    });
-    ctx._curatedPages = [];
-
-    const result = await discoverAndSamplePages(ctx);
-    expect(result.urls).toEqual(['http://curated-empty.local']);
-    expect(result.warnings).toHaveLength(1);
-    expect(result.warnings[0]).toContain('no pages defined');
+  it('curated strategy with empty pages throws validation error', () => {
+    expect(() =>
+      createContext('http://curated-empty.local', {
+        requestDelay: 0,
+        samplingStrategy: 'curated',
+        curatedPages: [],
+      }),
+    ).toThrow('Curated sampling requires curatedPages to be non-empty');
   });
 
   it('curated strategy does not apply maxLinksToTest', async () => {
@@ -2088,8 +2087,8 @@ describe('discoverAndSamplePages', () => {
       requestDelay: 0,
       samplingStrategy: 'curated',
       maxLinksToTest: 5,
+      curatedPages: urls,
     });
-    ctx._curatedPages = urls;
 
     const result = await discoverAndSamplePages(ctx);
     expect(result.urls).toHaveLength(100);
diff --git a/test/unit/runner.test.ts b/test/unit/runner.test.ts
index 867d76a..80b7fab 100644
--- a/test/unit/runner.test.ts
+++ b/test/unit/runner.test.ts
@@ -73,6 +73,26 @@ describe('createContext URL normalization', () => {
     const ctx = createContext('https://example.com');
     expect(ctx._curatedPages).toBeUndefined();
   });
+
+  it('throws on invalid options', () => {
+    expect(() => createContext('https://example.com', { maxConcurrency: -1 })).toThrow(
+      'Invalid options',
+    );
+  });
+
+  it('normalizes bare domain in canonicalOrigin', () => {
+    const ctx = createContext('https://preview.example.com', {
+      canonicalOrigin: 'example.com',
+    });
+    expect(ctx.options.canonicalOrigin).toBe('https://example.com');
+  });
+
+  it('normalizes bare domain in llmsTxtUrl', () => {
+    const ctx = createContext('https://example.com', {
+      llmsTxtUrl: 'example.com/llms.txt',
+    });
+    expect(ctx.options.llmsTxtUrl).toBe('https://example.com/llms.txt');
+  });
 });
 
 describe('runner', () => {
@@ -467,4 +487,16 @@ describe('runner', () => {
     expect(report.discoverySources).toBeDefined();
     expect(report.discoverySources).toContain('llms-txt');
   });
+
+  it('throws on invalid options', async () => {
+    await expect(runChecks('http://invalid-opts.local', { maxConcurrency: -1 })).rejects.toThrow(
+      'Invalid options',
+    );
+  });
+
+  it('throws on unknown check IDs', async () => {
+    await expect(
+      runChecks('http://invalid-opts.local', { checkIds: ['nonexistent-check'] }),
+    ).rejects.toThrow('Unknown check ID');
+  });
 });
diff --git a/test/unit/validation.test.ts b/test/unit/validation.test.ts
new file mode 100644
index 0000000..55934be
--- /dev/null
+++ b/test/unit/validation.test.ts
@@ -0,0 +1,349 @@
+import { describe, it, expect } from 'vitest';
+import { validateRunnerOptions } from '../../src/validation.js';
+import type { RunnerOptions } from '../../src/types.js';
+
+// Ensure all checks are registered for check ID validation
+import '../../src/checks/index.js';
+
+describe('validateRunnerOptions', () => {
+  it('returns valid for empty options', () => {
+    const result = validateRunnerOptions({});
+    expect(result.valid).toBe(true);
+    expect(result.errors).toHaveLength(0);
+    expect(result.warnings).toHaveLength(0);
+  });
+
+  it('returns valid for well-formed options', () => {
+    const result = validateRunnerOptions({
+      maxConcurrency: 5,
+      requestDelay: 100,
+      maxLinksToTest: 25,
+      samplingStrategy: 'random',
+      thresholds: { pass: 50_000, fail: 100_000 },
+      coveragePassThreshold: 95,
+      coverageWarnThreshold: 80,
+      parityPassThreshold: 5,
+      parityWarnThreshold: 20,
+    });
+    expect(result.valid).toBe(true);
+    expect(result.errors).toHaveLength(0);
+  });
+
+  it('collects multiple errors in a single call', () => {
+    const result = validateRunnerOptions({
+      maxConcurrency: -1,
+      requestDelay: -1,
+      samplingStrategy: 'invalid' as RunnerOptions['samplingStrategy'],
+    });
+    expect(result.valid).toBe(false);
+    expect(result.errors.length).toBeGreaterThanOrEqual(3);
+  });
+
+  describe('NaN checking', () => {
+    const nanFields: [string, Partial<RunnerOptions>][] = [
+      ['maxConcurrency', { maxConcurrency: NaN }],
+      ['requestDelay', { requestDelay: NaN }],
+      ['requestTimeout', { requestTimeout: NaN }],
+      ['maxLinksToTest', { maxLinksToTest: NaN }],
+      ['coveragePassThreshold', { coveragePassThreshold: NaN }],
+      ['coverageWarnThreshold', { coverageWarnThreshold: NaN }],
+      ['parityPassThreshold', { parityPassThreshold: NaN }],
+      ['parityWarnThreshold', { parityWarnThreshold: NaN }],
+    ];
+
+    for (const [field, opts] of nanFields) {
+      it(`rejects NaN ${field}`, () => {
+        const result = validateRunnerOptions(opts);
+        expect(result.valid).toBe(false);
+        expect(result.errors.some((e) => e.field === field)).toBe(true);
+      });
+    }
+
+    it('rejects NaN thresholds.pass', () => {
+      const result = validateRunnerOptions({ thresholds: { pass: NaN, fail: 100_000 } });
+      expect(result.valid).toBe(false);
+      expect(result.errors.some((e) => e.field === 'thresholds.pass')).toBe(true);
+    });
+
+    it('rejects NaN thresholds.fail', () => {
+      const result = validateRunnerOptions({ thresholds: { pass: 50_000, fail: NaN } });
+      expect(result.valid).toBe(false);
+      expect(result.errors.some((e) => e.field === 'thresholds.fail')).toBe(true);
+    });
+  });
+
+  describe('numeric range: maxConcurrency', () => {
+    it('rejects 0', () => {
+      const result = validateRunnerOptions({ maxConcurrency: 0 });
+      expect(result.valid).toBe(false);
+      expect(result.errors[0].field).toBe('maxConcurrency');
+    });
+
+    it('rejects negative values', () => {
+      const result = validateRunnerOptions({ maxConcurrency: -5 });
+      expect(result.valid).toBe(false);
+    });
+
+    it('accepts 1', () => {
+      expect(validateRunnerOptions({ maxConcurrency: 1 }).valid).toBe(true);
+    });
+
+    it('accepts 100', () => {
+      expect(validateRunnerOptions({ maxConcurrency: 100 }).valid).toBe(true);
+    });
+
+    it('rejects 101', () => {
+      const result = validateRunnerOptions({ maxConcurrency: 101 });
+      expect(result.valid).toBe(false);
+      expect(result.errors[0].message).toContain('between 1 and 100');
+    });
+
+    it('rejects non-integer', () => {
+      const result = validateRunnerOptions({ maxConcurrency: 3.5 });
+      expect(result.valid).toBe(false);
+      expect(result.errors[0].message).toContain('integer');
+    });
+  });
+
+  describe('numeric range: requestDelay', () => {
+    it('accepts 0', () => {
+      expect(validateRunnerOptions({ requestDelay: 0 }).valid).toBe(true);
+    });
+
+    it('rejects negative values', () => {
+      const result = validateRunnerOptions({ requestDelay: -1 });
+      expect(result.valid).toBe(false);
+      expect(result.errors[0].field).toBe('requestDelay');
+    });
+  });
+
+  describe('numeric range: maxLinksToTest', () => {
+    it('rejects 0', () => {
+      const result = validateRunnerOptions({ maxLinksToTest: 0 });
+      expect(result.valid).toBe(false);
+    });
+
+    it('accepts 1', () => {
+      expect(validateRunnerOptions({ maxLinksToTest: 1 }).valid).toBe(true);
+    });
+  });
+
+  describe('numeric range: size thresholds', () => {
+    it('rejects thresholds.pass < 1', () => {
+      const result = validateRunnerOptions({ thresholds: { pass: 0, fail: 100_000 } });
+      expect(result.valid).toBe(false);
+      expect(result.errors.some((e) => e.field === 'thresholds.pass')).toBe(true);
+    });
+
+    it('rejects thresholds.fail < 1', () => {
+      const result = validateRunnerOptions({ thresholds: { pass: 50_000, fail: 0 } });
+      expect(result.valid).toBe(false);
+      expect(result.errors.some((e) => e.field === 'thresholds.fail')).toBe(true);
+    });
+  });
+
+  describe('numeric range: coverage thresholds', () => {
+    it('rejects coveragePassThreshold > 100', () => {
+      const result = validateRunnerOptions({ coveragePassThreshold: 101 });
+      expect(result.valid).toBe(false);
+    });
+
+    it('rejects coveragePassThreshold < 0', () => {
+      const result = validateRunnerOptions({ coveragePassThreshold: -1 });
+      expect(result.valid).toBe(false);
+    });
+
+    it('accepts boundary values 0 and 100', () => {
+      expect(validateRunnerOptions({ coveragePassThreshold: 0 }).valid).toBe(true);
+      expect(validateRunnerOptions({ coveragePassThreshold: 100 }).valid).toBe(true);
+    });
+  });
+
+  describe('numeric range: parity thresholds', () => {
+    it('rejects parityPassThreshold > 100', () => {
+      const result = validateRunnerOptions({ parityPassThreshold: 101 });
+      expect(result.valid).toBe(false);
+    });
+
+    it('rejects parityWarnThreshold < 0', () => {
+      const result = validateRunnerOptions({ parityWarnThreshold: -1 });
+      expect(result.valid).toBe(false);
+    });
+  });
+
+  describe('threshold ordering: size', () => {
+    it('errors when pass > fail', () => {
+      const result = validateRunnerOptions({ thresholds: { pass: 200_000, fail: 100_000 } });
+      expect(result.valid).toBe(false);
+      expect(result.errors.some((e) => e.field === 'thresholds')).toBe(true);
+    });
+
+    it('accepts pass === fail', () => {
+      expect(validateRunnerOptions({ thresholds: { pass: 50_000, fail: 50_000 } }).valid).toBe(
+        true,
+      );
+    });
+
+    it('accepts pass < fail', () => {
+      expect(validateRunnerOptions({ thresholds: { pass: 50_000, fail: 100_000 } }).valid).toBe(
+        true,
+      );
+    });
+  });
+
+  describe('threshold ordering: coverage', () => {
+    it('errors when pass < warn', () => {
+      const result = validateRunnerOptions({
+        coveragePassThreshold: 50,
+        coverageWarnThreshold: 80,
+      });
+      expect(result.valid).toBe(false);
+      expect(result.errors.some((e) => e.field === 'coveragePassThreshold')).toBe(true);
+      expect(result.errors[0].message).toContain('greater than or equal to');
+    });
+
+    it('accepts pass > warn', () => {
+      expect(
+        validateRunnerOptions({ coveragePassThreshold: 95, coverageWarnThreshold: 80 }).valid,
+      ).toBe(true);
+    });
+
+    it('accepts pass === warn', () => {
+      expect(
+        validateRunnerOptions({ coveragePassThreshold: 80, coverageWarnThreshold: 80 }).valid,
+      ).toBe(true);
+    });
+
+    it('skips ordering check when only pass provided', () => {
+      expect(validateRunnerOptions({ coveragePassThreshold: 50 }).valid).toBe(true);
+    });
+
+    it('skips ordering check when only warn provided', () => {
+      expect(validateRunnerOptions({ coverageWarnThreshold: 80 }).valid).toBe(true);
+    });
+  });
+
+  describe('threshold ordering: parity', () => {
+    it('errors when pass > warn', () => {
+      const result = validateRunnerOptions({
+        parityPassThreshold: 30,
+        parityWarnThreshold: 10,
+      });
+      expect(result.valid).toBe(false);
+      expect(result.errors.some((e) => e.field === 'parityPassThreshold')).toBe(true);
+      expect(result.errors[0].message).toContain('less than or equal to');
+    });
+
+    it('accepts pass < warn', () => {
+      expect(validateRunnerOptions({ parityPassThreshold: 5, parityWarnThreshold: 20 }).valid).toBe(
+        true,
+      );
+    });
+
+    it('accepts pass === warn', () => {
+      expect(
+        validateRunnerOptions({ parityPassThreshold: 10, parityWarnThreshold: 10 }).valid,
+      ).toBe(true);
+    });
+
+    it('skips ordering check when only pass provided', () => {
+      expect(validateRunnerOptions({ parityPassThreshold: 30 }).valid).toBe(true);
+    });
+  });
+
+  describe('enum: samplingStrategy', () => {
+    it('rejects invalid strategy', () => {
+      const result = validateRunnerOptions({
+        samplingStrategy: 'invalid' as RunnerOptions['samplingStrategy'],
+      });
+      expect(result.valid).toBe(false);
+      expect(result.errors[0].field).toBe('samplingStrategy');
+      expect(result.errors[0].message).toContain('random');
+    });
+
+    for (const strategy of ['random', 'deterministic', 'curated', 'none'] as const) {
+      it(`accepts "${strategy}"`, () => {
+        const opts: Partial<RunnerOptions> =
+          strategy === 'curated'
+            ? { samplingStrategy: strategy, curatedPages: ['https://example.com/a'] }
+            : { samplingStrategy: strategy };
+        expect(validateRunnerOptions(opts).valid).toBe(true);
+      });
+    }
+  });
+
+  describe('constraint: curated requires pages', () => {
+    it('errors for curated with no curatedPages', () => {
+      const result = validateRunnerOptions({ samplingStrategy: 'curated' });
+      expect(result.valid).toBe(false);
+      expect(result.errors.some((e) => e.message.includes('curatedPages'))).toBe(true);
+    });
+
+    it('errors for curated with empty curatedPages', () => {
+      const result = validateRunnerOptions({ samplingStrategy: 'curated', curatedPages: [] });
+      expect(result.valid).toBe(false);
+    });
+
+    it('accepts curated with curatedPages', () => {
+      const result = validateRunnerOptions({
+        samplingStrategy: 'curated',
+        curatedPages: ['https://example.com/a'],
+      });
+      expect(result.valid).toBe(true);
+    });
+  });
+
+  describe('URL validation', () => {
+    it('errors for invalid canonicalOrigin', () => {
+      const result = validateRunnerOptions({ canonicalOrigin: 'not a url' });
+      expect(result.valid).toBe(false);
+      expect(result.errors[0].field).toBe('canonicalOrigin');
+    });
+
+    it('accepts valid canonicalOrigin', () => {
+      expect(validateRunnerOptions({ canonicalOrigin: 'https://example.com' }).valid).toBe(true);
+    });
+
+    it('errors for invalid llmsTxtUrl', () => {
+      const result = validateRunnerOptions({ llmsTxtUrl: 'not a url' });
+      expect(result.valid).toBe(false);
+      expect(result.errors[0].field).toBe('llmsTxtUrl');
+    });
+
+    it('accepts valid llmsTxtUrl', () => {
+      expect(validateRunnerOptions({ llmsTxtUrl: 'https://example.com/llms.txt' }).valid).toBe(
+        true,
+      );
+    });
+
+    it('skips validation when undefined', () => {
+      expect(validateRunnerOptions({ canonicalOrigin: undefined }).valid).toBe(true);
+    });
+  });
+
+  describe('check IDs', () => {
+    it('errors for unknown checkIds', () => {
+      const result = validateRunnerOptions({ checkIds: ['llms-txt-exists', 'nonexistent-check'] });
+      expect(result.valid).toBe(false);
+      expect(result.errors).toHaveLength(1);
+      expect(result.errors[0].field).toBe('checkIds');
+      expect(result.errors[0].message).toContain('nonexistent-check');
+    });
+
+    it('errors for unknown skipCheckIds', () => {
+      const result = validateRunnerOptions({ skipCheckIds: ['nonexistent-check'] });
+      expect(result.valid).toBe(false);
+      expect(result.errors[0].field).toBe('skipCheckIds');
+    });
+
+    it('accepts valid checkIds', () => {
+      const result = validateRunnerOptions({ checkIds: ['llms-txt-exists'] });
+      expect(result.valid).toBe(true);
+    });
+
+    it('accepts empty checkIds', () => {
+      const result = validateRunnerOptions({ checkIds: [] });
+      expect(result.valid).toBe(true);
+    });
+  });
+});

From 1d514bc48e86bc76977a4b8b558ffc8745e30d54 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 26 Apr 2026 16:53:18 -0400
Subject: [PATCH 10/13] Update resolution text and docs recommendations to
 match updated spec language

---
 docs/about.md                   |  2 +-
 docs/checks/index.md            |  2 +-
 docs/checks/page-size.md        | 17 +++++++++--------
 docs/improve-your-score.md      |  2 +-
 docs/interaction-diagnostics.md |  4 ++--
 src/scoring/resolutions.ts      | 33 +++++++++++++++++++++------------
 6 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/docs/about.md b/docs/about.md
index a1d8ff8..05e4848 100644
--- a/docs/about.md
+++ b/docs/about.md
@@ -23,7 +23,7 @@ The [Agent-Friendly Documentation Spec](https://agentdocsspec.com) is the founda
 
 The spec is maintained at [github.com/agent-ecosystem/agent-docs-spec](https://github.com/agent-ecosystem/agent-docs-spec) and is open for contributions.
 
-AFDocs implements spec v0.3.0 (2026-03-31).
+AFDocs implements spec v0.5.0 (2026-04-25).
 
 ## Status
 
diff --git a/docs/checks/index.md b/docs/checks/index.md
index 0a0f11b..cd5c4ca 100644
--- a/docs/checks/index.md
+++ b/docs/checks/index.md
@@ -6,7 +6,7 @@ AFDocs runs 23 checks across 7 categories. Each check implements a section of th
 
 | Category                                                   | Checks | What it covers                                                       |
 | ---------------------------------------------------------- | ------ | -------------------------------------------------------------------- |
-| [Content Discoverability](/checks/content-discoverability) | 6      | Whether agents can find and navigate your documentation via llms.txt |
+| [Content Discoverability](/checks/content-discoverability) | 7      | Whether agents can find and navigate your documentation via llms.txt |
 | [Markdown Availability](/checks/markdown-availability)     | 2      | Whether agents can get documentation as markdown instead of HTML     |
 | [Page Size and Truncation Risk](/checks/page-size)         | 4      | Whether agents can process your pages without losing content         |
 | [Content Structure](/checks/content-structure)             | 3      | Whether page content is structured in ways agents can consume        |
diff --git a/docs/checks/page-size.md b/docs/checks/page-size.md
index 7c3024b..5135fcc 100644
--- a/docs/checks/page-size.md
+++ b/docs/checks/page-size.md
@@ -2,7 +2,7 @@
 
 Whether agents can process your pages without losing content. Agent platforms have diverse truncation limits, from 5K characters on some platforms to over 100K on others. Pages that exceed these limits are silently truncated: the agent sees the beginning of the page and loses the rest.
 
-This category also covers the related problem of pages that technically fit within limits but waste most of that budget on boilerplate (inline CSS, JavaScript, navigation chrome) instead of documentation content.
+This category also covers the related problem of pages that technically fit within limits but waste most of that budget on boilerplate (navigation chrome, breadcrumbs, sidebars) instead of documentation content.
 
 ## rendering-strategy
 
@@ -83,7 +83,7 @@ Character count of the HTML response and the post-conversion size when converted
 
 ### Why it matters
 
-Many agents receive HTML, either because they don't request markdown or because the server doesn't support delivering markdown when requested. When agents receive HTML, the page size that matters isn't the raw HTML; it's how large the page is after the agent's platform converts it to text. Pages with a lot of inline CSS and JavaScript may be less likely to convert cleanly to text; the version of the page that the agent "sees" may still contain a lot of inline CSS and JavaScript. This can push the actual documentation content past agent truncation limits.
+Many agents receive HTML, either because they don't request markdown or because the server doesn't support delivering markdown when requested. When agents receive HTML, the page size that matters isn't the raw HTML; it's how large the page is after the agent's platform converts it to text. Navigation boilerplate, serialized tabbed content, and deeply nested page structure can all inflate the converted output well beyond the documentation content itself. This can push the actual documentation content past agent truncation limits.
 
 AFDocs measures both the raw HTML size and the post-conversion size, and scores based on the conversion result. See [content-start-position](#content-start-position) below for more on how boilerplate affects what agents see.
 
@@ -101,13 +101,14 @@ The output also reports the conversion ratio. A page that converts from 505KB HT
 
 ### How to fix
 
-**If pages convert to too many characters**, the fix depends on where the bloat comes from:
+**If pages convert to too many characters**, review pages for reducible boilerplate (navigation, serialized tabbed content) and consider these fixes:
 
-- **Inline CSS/JS**: Move styles and scripts to external files. This is the most common cause of high boilerplate percentages.
 - **Large pages**: Break long reference pages into smaller sections.
+- **Navigation boilerplate**: Reduce navigation, sidebar, and breadcrumb markup that inflates the converted output.
 - **Tabbed content**: See [tabbed-content-serialization](/checks/content-structure#tabbed-content-serialization).
+- **Markdown alternative**: Provide markdown versions as a smaller alternative path for agents that bypass HTML conversion overhead.
 
-**If you also serve markdown**, this check matters less for agents that can request it. But most agents (4 of 6 tested) still fetch HTML, so the HTML path remains important.
+Markdown availability helps agents that request it, but most agents still fetch HTML, so fixing the HTML path remains important.
 
 ---
 
@@ -122,7 +123,7 @@ How far into the response actual documentation content begins.
 
 ### Why it matters
 
-When agents convert HTML to text, they don't always strip out the non-content parts of the page. Inline CSS and JavaScript can end up mixed in with your documentation from the agent's perspective. If enough of this boilerplate appears before your actual content, the agent may never see your documentation at all because it hits truncation limits first.
+After HTML-to-markdown conversion, boilerplate often survives. Navigation menus, breadcrumbs, sidebars, and footer content all convert to text that precedes or surrounds the actual documentation. Depending on the agent's conversion pipeline, inline CSS and JavaScript may also survive as raw text. If enough of this boilerplate appears before your actual content, the agent may never see your documentation at all because it hits truncation limits first.
 
 In observed cases, actual content didn't start until 87% through the converted page: 441,000 characters of styling code before the first paragraph of actual documentation. The agent reported seeing a documentation page _about_ CSS instead of the actual documentation content.
 
@@ -138,6 +139,6 @@ Based on where content begins in the converted output:
 
 ### How to fix
 
-**If this check warns or fails**, move inline CSS and JavaScript to external files. This is the most effective fix because external resources aren't included when agents process the page.
+**If this check warns or fails**, reduce navigation, breadcrumb, and sidebar markup that precedes the content area. These are the most common sources of boilerplate that pushes content past truncation limits.
 
-If your platform inlines critical CSS, check whether you can reduce the amount. Navigation chrome, theme variables, and third-party widget styles all contribute to the boilerplate before content.
+If your platform inlines CSS or JavaScript, check whether you can reduce the amount or move it to external files. Navigation chrome, theme variables, and third-party widget styles all contribute to the boilerplate before content.
diff --git a/docs/improve-your-score.md b/docs/improve-your-score.md
index 2227649..3e008ef 100644
--- a/docs/improve-your-score.md
+++ b/docs/improve-your-score.md
@@ -110,7 +110,7 @@ These are worth addressing but won't move the score as dramatically:
 
 - **llms.txt directive in markdown** (`llms-txt-directive-md`): Add a blockquote near the top of each markdown page pointing to your llms.txt.
 - **Content negotiation** (`content-negotiation`): Return markdown when agents send `Accept: text/markdown`. Requires server-side support.
-- **Content start position** (`content-start-position`): Reduce boilerplate (inline CSS/JS, navigation markup) before the main content. Move styles and scripts to external files.
+- **Content start position** (`content-start-position`): Reduce navigation, breadcrumb, and sidebar markup that precedes the main content area.
 - **Tabbed content** (`tabbed-content-serialization`): If tabbed UI components create oversized output, consider restructuring into separate pages or using query params to retrieve only specific tab versions.
 - **Code fence validity** (`markdown-code-fence-validity`): Fix unclosed code fences in your markdown sources.
 - **Redirect behavior** (`redirect-behavior`): Replace JavaScript and cross-host redirects with standard HTTP redirects.
diff --git a/docs/interaction-diagnostics.md b/docs/interaction-diagnostics.md
index 929a339..b4c3c25 100644
--- a/docs/interaction-diagnostics.md
+++ b/docs/interaction-diagnostics.md
@@ -68,9 +68,9 @@ These diagnostics appear in the "Interaction Diagnostics" section of the `--form
 
 **Triggers when** HTML pages exceed agent truncation limits and there's no discoverable markdown path that could offer smaller representations.
 
-**What it means**: Agents will silently receive truncated content on oversized pages, with no alternative path to the full content. This is particularly common on sites that inline large amounts of CSS and JavaScript.
+**What it means**: Agents will silently receive truncated content on oversized pages, with no alternative path to the full content.
 
-**What to do**: Either reduce HTML page sizes (break large pages into smaller ones, move inline CSS/JS to external files) or provide markdown versions and make them discoverable via content negotiation or llms.txt links. See [Page Size checks](/checks/page-size) for the specific thresholds.
+**What to do**: Either reduce HTML page sizes (break large pages into smaller ones, reduce navigation boilerplate) or provide markdown versions and make them discoverable via content negotiation or llms.txt links. See [Page Size checks](/checks/page-size) for the specific thresholds.
 
 **Score impact**: No direct score cap, but the combination of failing page-size checks with no markdown alternative typically results in low category scores for both Page Size and Markdown Availability.
 
diff --git a/src/scoring/resolutions.ts b/src/scoring/resolutions.ts
index 17b014b..a12cb45 100644
--- a/src/scoring/resolutions.ts
+++ b/src/scoring/resolutions.ts
@@ -178,7 +178,9 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       const tested = (d.testedPages as number) ?? 0;
       return (
         `${warnCount} of ${tested} pages convert to 50K-100K characters of ` +
-        'markdown. These may be truncated on some agent platforms.'
+        'markdown. Review pages for reducible boilerplate (navigation, ' +
+        'serialized tabbed content). Consider providing markdown versions ' +
+        'as a smaller alternative path for agents.'
       );
     },
     fail: (d) => {
@@ -186,8 +188,9 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       const tested = (d.testedPages as number) ?? 0;
       return (
         `${failCount} of ${tested} pages convert to over 100K characters of ` +
-        'markdown. Reduce inline CSS/JS, break large pages, or provide ' +
-        'markdown versions as a smaller alternative.'
+        'markdown. Break large pages into smaller units, reduce navigation ' +
+        'boilerplate, or provide markdown versions that bypass the HTML ' +
+        'conversion overhead.'
       );
     },
   },
@@ -198,9 +201,8 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       const tested = (d.testedPages as number) ?? 0;
       return (
         `${warnCount} of ${tested} pages have documentation content ` +
-        'starting 10-50% into the converted output. Inline CSS or ' +
-        "boilerplate consumes part of the agent's truncation budget " +
-        'before content begins.'
+        'starting 10-50% into the converted output. Reduce navigation, ' +
+        'breadcrumb, and sidebar markup that precedes the content area.'
       );
     },
     fail: (d) => {
@@ -209,7 +211,8 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       return (
         `${failCount} of ${tested} pages have content starting past 50% of ` +
         'the converted output. Agents may never see the documentation ' +
-        'content. Move or remove inline CSS/JS that precedes the content area.'
+        'content. Reduce navigation, breadcrumb, and sidebar markup that ' +
+        'precedes the content area.'
       );
     },
   },
@@ -293,7 +296,9 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       return (
         `Your llms.txt covers ${coverage}% of your site's pages ` +
         `(${warnThreshold}-${passThreshold}% is warn). ${missing} live ` +
-        'pages are not represented in the index.'
+        'pages are not represented in the index. Review missing pages ' +
+        'and add them, or adjust --coverage-pass-threshold/' +
+        '--coverage-warn-threshold if they are intentionally excluded.'
       );
     },
     fail: (d) => {
@@ -303,8 +308,10 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       return (
         `Your llms.txt covers ${coverage}% of your site's pages ` +
         `(below ${warnThreshold}% threshold). ` +
-        `${missing} live pages are missing from the index. Regenerate ` +
-        'llms.txt from your sitemap or build pipeline.'
+        `${missing} live pages are missing from the index. If ` +
+        'unintentional, regenerate llms.txt from your sitemap or build ' +
+        'pipeline. If intentional, lower the threshold or set it to 0 to ' +
+        'make the check informational.'
       );
     },
   },
@@ -339,7 +346,8 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
       const warnCount = (d.warnBucket as number) ?? 0;
       return (
         `${warnCount} endpoints have moderate cache lifetimes (1-24 hours). ` +
-        'Updates to llms.txt or markdown content may take hours to propagate.'
+        'Updates to llms.txt or markdown content may take hours to ' +
+        'propagate. Consider reducing cache lifetimes for these resources.'
       );
     },
     fail: (d) => {
@@ -361,7 +369,8 @@ const RESOLUTION_TEMPLATES: Record<string, ResolutionTemplate> = {
     fail: () =>
       'All or most documentation pages require authentication. Agents ' +
       'cannot access your documentation and will rely on potentially ' +
-      'outdated training data or secondary sources.',
+      'outdated training data or secondary sources. Consider providing ' +
+      'alternative access paths (see auth-alternative-access check).',
   },
 
   'auth-alternative-access': {

From da5831626a66ef106e44215c37062468e27ce430 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 26 Apr 2026 17:43:01 -0400
Subject: [PATCH 11/13] Add migration document for breaking changes, update
 stale references

---
 README.md                          |   2 +-
 SCORING.md                         |   4 +-
 docs/.vitepress/config.ts          |   4 +
 docs/generate_llms_txt             |  10 +-
 docs/migration/v0.17.0.md          | 223 +++++++++++++++++++++++++++++
 docs/public/llms.txt               |   6 +-
 docs/reference/programmatic-api.md |   2 +-
 scoring-reference.md               |   6 +-
 src/constants.ts                   |   3 +
 src/index.ts                       |   1 +
 10 files changed, 252 insertions(+), 9 deletions(-)
 create mode 100644 docs/migration/v0.17.0.md

diff --git a/README.md b/README.md
index 03f59f8..16402c6 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Powering [Agent Score](https://buildwithfern.com/agent-score) by Fern.
 
 > **Status: Early development (0.x)**
 > Check IDs, CLI flags, and output formats may change between minor versions.
-> Implements [spec v0.3.0](https://agentdocsspec.com/spec) (2026-03-31).
+> Implements [spec v0.5.0](https://agentdocsspec.com/spec) (2026-04-25).
 
 ## Quick start
 
diff --git a/SCORING.md b/SCORING.md
index f9523b5..3b31770 100644
--- a/SCORING.md
+++ b/SCORING.md
@@ -1,6 +1,6 @@
 # How the Agent-Friendly Docs Score Works
 
-Scoring Version: 0.1.0 · [Agent-Friendly Docs Spec v0.3.0](https://agentdocsspec.com) · March 2026
+Scoring Version: 0.1.0 · [Agent-Friendly Docs Spec v0.5.0](https://agentdocsspec.com) · April 2026
 
 ## What is this score?
 
@@ -23,7 +23,7 @@ The score reflects how well agents can _actually use_ your documentation, not ju
 
 ## What we check
 
-The 23 checks are grouped into seven categories. Each check is assigned a **weight tier** based on its observed impact on agent workflows:
+The 23 checks are grouped into seven categories. Each check is assigned a **weight tier** based on its observed impact on agent workflows (3 critical, 8 high, 10 medium, 2 low; max raw score 130):
 
 - **Critical (10 pts)**: Agents cannot function without this. Failure means zero content, zero navigation, or zero access.
 - **High (7 pts)**: Directly limits agent effectiveness. Failure means truncation, dead ends, or agents stuck on a worse path.
diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts
index 1d127d0..ff7a8af 100644
--- a/docs/.vitepress/config.ts
+++ b/docs/.vitepress/config.ts
@@ -105,6 +105,10 @@ export default defineConfig({
           { text: 'Config File', link: '/reference/config-file' },
         ],
       },
+      {
+        text: 'Migration',
+        items: [{ text: 'v0.17.0', link: '/migration/v0.17.0' }],
+      },
       {
         text: 'About',
         items: [{ text: 'About AFDocs', link: '/about' }],
diff --git a/docs/generate_llms_txt b/docs/generate_llms_txt
index 5a19db3..5f882cd 100755
--- a/docs/generate_llms_txt
+++ b/docs/generate_llms_txt
@@ -10,7 +10,7 @@ BASE_URL="https://afdocs.dev"
 cat > "$OUTPUT" << 'HEADER'
 # AFDocs
 
-> Test your documentation site against the Agent-Friendly Documentation Spec. AFDocs runs 22 checks across 7 categories to measure how well AI coding agents can discover, navigate, and consume your docs.
+> Test your documentation site against the Agent-Friendly Documentation Spec. AFDocs runs 23 checks across 7 categories to measure how well AI coding agents can discover, navigate, and consume your docs.
 
 ## The Score
 
@@ -61,6 +61,14 @@ echo "- [Config File](${BASE_URL}/reference/config-file.md): agent-docs.config.y
 
 cat >> "$OUTPUT" << 'SECTION'
 
+## Migration
+
+SECTION
+
+echo "- [Migrating to v0.17.0](${BASE_URL}/migration/v0.17.0.md)" >> "$OUTPUT"
+
+cat >> "$OUTPUT" << 'SECTION'
+
 ## About
 
 SECTION
diff --git a/docs/migration/v0.17.0.md b/docs/migration/v0.17.0.md
new file mode 100644
index 0000000..efcab89
--- /dev/null
+++ b/docs/migration/v0.17.0.md
@@ -0,0 +1,223 @@
+# Migrating to v0.17.0
+
+v0.17.0 updates AFDocs from the [Agent-Friendly Documentation Spec](https://agentdocsspec.com) v0.3.0 to v0.5.0. This is a breaking release. The sections below cover what changed and what you need to update.
+
+## What changed for CLI users
+
+If you run AFDocs from the command line and don't use the programmatic API, here's what you need to know.
+
+### Your scores may be different
+
+You may see score changes if you're using the `latest` version of the CLI, even if you don't change anything about how you run AFDocs. The scoring changes reflect refinements from community conversations and based on how agents actually interact with documentation:
+
+- **HTML page size and content start position scores may improve.** AFDocs now strips `<script>` and `<style>` elements from HTML before measuring page size, matching a change to how Claude Code processes pages. Sites with heavy inline JavaScript or CSS will see smaller measured sizes, which could improve your scores.
+
+- **Coverage scores (formerly freshness) may drop.** Some paths that were previously excluded from coverage calculations (`/changelog`, `/releases`, `/security`, `/status`) are no longer excluded by default. If your sitemap includes these paths but your llms.txt doesn't link to them, your coverage percentage will be lower. You can restore the old behavior with `--coverage-exclusions "/changelog/**,/releases/**,/security/**,/status/**"`.
+
+- **The directive check works differently.** The old `llms-txt-directive` check has been replaced by two checks: `llms-txt-directive-html` (checks HTML pages) and `llms-txt-directive-md` (checks markdown pages). The HTML check now ignores matches in navigation sidebars, which eliminates false positives that some sites were seeing.
+
+- **Runs that discover few pages now show N/A for some categories.** If AFDocs discovers fewer than 5 pages during automatic sampling, page-level categories (Page Size, Content Structure, URL Stability, etc.) show as N/A instead of producing scores based on too little data. Site-level checks (llms.txt, coverage, authentication) still score normally.
+
+### Check names that changed
+
+If you use `--check-ids` or `--skip-check-ids` to run specific checks, or if you have a [config file](/reference/config-file), two check names changed:
+
+- `llms-txt-freshness` is now `llms-txt-coverage`
+- `llms-txt-directive` is now `llms-txt-directive-html` and `llms-txt-directive-md`
+
+If you use old names in `--check-ids`, `--skip-check-ids`, or your config file, you'll get an error telling you the check ID is unknown. Update to the new names.
+
+### New diagnostics in your output
+
+The scorecard may now include these new diagnostics that weren't in previous versions:
+
+- **Markdown support is only partially discoverable**: Your site supports content negotiation, but most agents (which fetch HTML by default) have no signal to try the markdown path. Add a directive to your HTML pages.
+- **Single-page sample**: AFDocs couldn't discover enough pages to score reliably. Check that your llms.txt has working links, or provide pages with `--urls`.
+- **All llms.txt links are cross-origin**: Common when testing staging/preview deployments. Use `--canonical-origin` pointing to your public documentation site to fix.
+- **Gzipped sitemap skipped**: A `.gz` sitemap was found but couldn't be read. Provide an uncompressed sitemap alongside it.
+- **Severe rate limiting**: The site is returning 429 errors. Increase `--request-delay` to slow down.
+
+### New CLI flags
+
+You can now configure `llms-txt-coverage` and `markdown-content-parity` check thresholds from the command line:
+
+- `--coverage-pass-threshold`, `--coverage-warn-threshold`, `--coverage-exclusions`
+- `--parity-pass-threshold`, `--parity-warn-threshold`, `--parity-exclusions`
+
+No existing flags were removed or renamed. For details on these options, see [CLI Reference](/reference/cli).
+
+---
+
+The rest of this page covers changes relevant to developers building on the AFDocs programmatic API or parsing JSON output.
+
+## Check ID changes
+
+Two check IDs changed. If you reference check IDs anywhere (config files, `--check-ids` / `--skip-check-ids` flags, JSON output parsers, CI scripts), update them.
+
+| Old ID               | New ID(s)                                          | What happened                                                      |
+| -------------------- | -------------------------------------------------- | ------------------------------------------------------------------ |
+| `llms-txt-freshness` | `llms-txt-coverage`                                | Renamed. Same check, new name.                                     |
+| `llms-txt-directive` | `llms-txt-directive-html`, `llms-txt-directive-md` | Split into two checks: one for HTML pages, one for markdown pages. |
+
+The split means the total check count went from 22 to 23, and the max raw score changed from 126 to 130. `llms-txt-directive-html` kept the original's High/7 weight. `llms-txt-directive-md` is Medium/4.
+
+## Config file changes
+
+If your `.afdocs.yml` or `.afdocs.yaml` references old check IDs in `checkIds`, `skipCheckIds`, or URL-specific overrides, update them to the new IDs. Unknown check IDs now produce errors instead of being silently ignored.
+
+New optional config fields are available:
+
+```yaml
+# To configure 'llms-txt-coverage', you can define your own minimum coverage thresholds
+coveragePassThreshold: 95 # 0-100, default 95 (higher = stricter)
+coverageWarnThreshold: 80 # 0-100, default 80
+coverageExclusions: # glob patterns excluded from sitemap denominator
+  - '/api/internal/**'
+
+# To configure 'markdown-content-parity', you can define your own maximum disparity thresholds
+parityPassThreshold: 5 # 0-100, default 5 (lower = stricter)
+parityWarnThreshold: 20 # 0-100, default 20
+parityExclusions: # CSS selectors stripped from HTML before comparison
+  - '[data-markdown-ignore]'
+```
+
+For more details about configuration, including turning these into informational-only checks, refer to the check documentation.
+
+**YAML quoting note:** Selectors like `[data-markdown-ignore]` must be quoted in YAML. Without quotes, YAML parses brackets as a nested array. v0.17.0 validates this at config load time and produces a clear error.
+
+## CLI changes
+
+New flags for the coverage and parity checks:
+
+- `--coverage-pass-threshold`, `--coverage-warn-threshold`, `--coverage-exclusions`
+- `--parity-pass-threshold`, `--parity-warn-threshold`, `--parity-exclusions`
+
+No existing flags were removed or renamed.
+
+## Score changes you may notice
+
+Even without changing your configuration, you may see different scores after upgrading:
+
+- **`page-size-html` and `content-start-position`** scores may improve. v0.17.0 strips `<script>` and `<style>` elements from HTML before conversion, matching how agents (including Claude Code) actually process pages. Sites with heavy inline JavaScript or CSS will see smaller post-conversion sizes and earlier content start positions, which may improve scoring.
+
+- **`llms-txt-coverage`** scores may drop. The built-in exclusion list was trimmed: `/changelog`, `/releases`, `/security`, and `/status` paths are no longer automatically excluded from the sitemap denominator. If your sitemap includes these paths but your llms.txt does not, coverage percentages will be lower. If you intentionally exclude URLs that include these paths from your llms.txt, you can restore the old behavior with `--coverage-exclusions "/changelog/**,/releases/**,/security/**,/status/**"`.
+
+- **`llms-txt-directive-html`** may produce different results than the old `llms-txt-directive`. The new check strips `<nav>`, `<script>`, and `<style>` elements from the HTML body before searching for directives. The old logic may have produced false positives for pages that formerly passed due to mentioning llms.txt in these areas, such as in platform documentation sites that document generating llms.txt files.
+
+- **Low page count runs** now produce N/A scores for page-level categories. When automatic discovery (`random` or `deterministic` sampling) finds fewer than 5 pages, page-level checks get `scoreDisplayMode: "notApplicable"` and are excluded from the overall score. This prevents a handful of pages from producing unreliable category scores. Site-level checks (llms.txt checks, coverage, auth-alternative-access) continue to score normally.
+
+## Programmatic API changes
+
+### Input validation
+
+`createContext()` and `runChecks()` now validate options and throw `Error` on invalid input. Previously, invalid values (NaN, negative concurrency, out-of-range thresholds) were silently accepted. If you pass hardcoded valid options, this won't affect you. If you pass user-controlled values, add try/catch or call the new `validateRunnerOptions()` for pre-flight validation:
+
+```typescript
+import { validateRunnerOptions } from 'afdocs';
+
+const result = validateRunnerOptions(options);
+if (!result.valid) {
+  // result.errors: Array<{ field: string; message: string }>
+  // result.warnings: Array<{ field: string; message: string }>
+}
+```
+
+### Type changes
+
+**`CheckScore`** has a new required field:
+
+```typescript
+interface CheckScore {
+  // ... existing fields ...
+  scoreDisplayMode: 'numeric' | 'notApplicable';
+}
+```
+
+**`CategoryScore`** fields are now nullable:
+
+```typescript
+interface CategoryScore {
+  score: number | null; // was: number
+  grade: Grade | null; // was: Grade
+}
+```
+
+A `null` score means all checks in that category were `notApplicable` (insufficient data).
+
+**`ReportResult`** has two new optional fields:
+
+```typescript
+interface ReportResult {
+  // ... existing fields ...
+  testedPages?: number;
+  samplingStrategy?: SamplingStrategy;
+}
+```
+
+### `evaluateDiagnostics()` signature change
+
+The function now requires the full `ReportResult` as a second argument:
+
+```typescript
+// Before
+evaluateDiagnostics(resultsMap);
+
+// After
+evaluateDiagnostics(resultsMap, report);
+```
+
+### New exports
+
+```typescript
+import {
+  validateRunnerOptions, // pre-flight option validation
+  VALID_SAMPLING_STRATEGIES, // ['random', 'deterministic', 'curated', 'none']
+  SPEC_VERSION, // 'v0.5.0'
+} from 'afdocs';
+
+import type { ValidationResult, ValidationIssue, SamplingStrategy, ScoreDisplayMode } from 'afdocs';
+```
+
+### `CheckOptions` new fields
+
+All optional with defaults matching previous behavior:
+
+```typescript
+interface CheckOptions {
+  // ... existing fields ...
+  coveragePassThreshold?: number; // default 95
+  coverageWarnThreshold?: number; // default 80
+  coverageExclusions?: string[];
+  parityPassThreshold?: number; // default 5
+  parityWarnThreshold?: number; // default 20
+  parityExclusions?: string[];
+}
+```
+
+### URL normalization
+
+`canonicalOrigin` and `llmsTxtUrl` options are now normalized by `createContext()` (scheme prepended if missing), matching existing `baseUrl` behavior. Bare domains like `example.com` now work where they previously may have caused issues.
+
+## JSON output changes
+
+If you parse JSON output from the CLI or programmatic API:
+
+- Check IDs `llms-txt-freshness` and `llms-txt-directive` no longer appear. Look for `llms-txt-coverage`, `llms-txt-directive-html`, and `llms-txt-directive-md` instead.
+- `llms-txt-coverage` details no longer include `thresholdWarnings`. Threshold validation now happens before checks run.
+- `llms-txt-coverage` details have new optional fields: `coveragePassThreshold`, `coverageWarnThreshold`, `userExcludedPages`, `omittedSubtrees`, `omittedSubtreePages`.
+- `llms-txt-links-resolve` details `crossOrigin` object has a new optional field: `dominantOrigin`.
+- `markdown-content-parity` details have new optional fields: `segmentationElementsStripped`, `parityPassThreshold`, `parityWarnThreshold`.
+- `markdown-content-parity` no longer returns `status: 'error'` for inverted thresholds. Invalid thresholds are caught at startup.
+- Resolution text changed for several checks. If you match on resolution strings, update your patterns.
+
+## Diagnostic changes
+
+New diagnostic IDs that may appear in the `diagnostics` array:
+
+- `markdown-partially-discoverable` (warning): fires when content negotiation works but there's no HTML directive pointing to llms.txt.
+- `single-page-sample` (warning): fires when fewer than 5 pages were discovered.
+- `cross-origin-llms-txt` (warning): fires when all llms.txt links point to a different origin.
+- `gzipped-sitemap-skipped` (info): fires when a `.gz` sitemap was encountered and skipped.
+- `rate-limiting-severe` (warning): fires when more than 20% of URLs returned 429.
+
+**Changed trigger:** `markdown-undiscoverable` now fires when `markdown-url-support` passes and `llms-txt-directive-html` does not pass and `content-negotiation` does not pass. Previously it required all three of content-negotiation, the old combined directive check, and llms-txt-links-markdown to fail. Sites where content negotiation passes but the HTML directive is missing will now see `markdown-partially-discoverable` instead.
diff --git a/docs/public/llms.txt b/docs/public/llms.txt
index 181ad1c..d2434a4 100644
--- a/docs/public/llms.txt
+++ b/docs/public/llms.txt
@@ -1,6 +1,6 @@
 # AFDocs
 
-> Test your documentation site against the Agent-Friendly Documentation Spec. AFDocs runs 22 checks across 7 categories to measure how well AI coding agents can discover, navigate, and consume your docs.
+> Test your documentation site against the Agent-Friendly Documentation Spec. AFDocs runs 23 checks across 7 categories to measure how well AI coding agents can discover, navigate, and consume your docs.
 
 ## The Score
 
@@ -35,6 +35,10 @@
 - [Scoring API](https://afdocs.dev/reference/scoring-api.md): computing scores from check results
 - [Config File](https://afdocs.dev/reference/config-file.md): agent-docs.config.yml format and options
 
+## Migration
+
+- [Migrating to v0.17.0](https://afdocs.dev/migration/v0.17.0.md)
+
 ## About
 
 - [About AFDocs](https://afdocs.dev/about.md)
diff --git a/docs/reference/programmatic-api.md b/docs/reference/programmatic-api.md
index 93c01fd..8769ee5 100644
--- a/docs/reference/programmatic-api.md
+++ b/docs/reference/programmatic-api.md
@@ -10,7 +10,7 @@ import { runChecks } from 'afdocs';
 const report = await runChecks('https://docs.example.com');
 
 console.log(report.summary);
-// { total: 22, pass: 15, warn: 3, fail: 2, skip: 2, error: 0 }
+// { total: 23, pass: 15, warn: 4, fail: 2, skip: 2, error: 0 }
 
 for (const result of report.results) {
   console.log(`${result.id}: ${result.status} — ${result.message}`);
diff --git a/scoring-reference.md b/scoring-reference.md
index ec11164..407c3a0 100644
--- a/scoring-reference.md
+++ b/scoring-reference.md
@@ -1,9 +1,9 @@
 # Scoring Implementation Reference: Agent-Friendly Docs Scorecard
 
 Scoring Version: 0.1.0
-Agent-Friendly Docs Spec Version: v0.3.0
+Agent-Friendly Docs Spec Version: v0.5.0
 Spec URL: https://agentdocsspec.com
-Date: 03/31/2026
+Date: 04/25/2026
 
 ## Goals
 
@@ -69,7 +69,7 @@ and the empirical evidence sections in each check definition.
 | `section-header-quality`       | Low      | 2      | Refinement for tabbed content; only matters when tabs exist.                                                                                          |
 | `cache-header-hygiene`         | Low      | 2      | Aggressive caching rarely causes acute agent failures.                                                                                                |
 
-**Maximum raw score**: 3(10) + 8(7) + 9(4) + 2(2) = 30 + 56 + 36 + 4 = **126 points**
+**Maximum raw score**: 3(10) + 8(7) + 10(4) + 2(2) = 30 + 56 + 40 + 4 = **130 points**
 
 ---
 
diff --git a/src/constants.ts b/src/constants.ts
index 9205f20..3ed7d03 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -58,3 +58,6 @@ export const MIN_PAGES_FOR_SCORING = 5;
 
 /** Base URL for the Agent-Friendly Documentation Spec. */
 export const SPEC_BASE_URL = 'https://agentdocsspec.com/spec/';
+
+/** Version of the Agent-Friendly Documentation Spec implemented by this release. */
+export const SPEC_VERSION = 'v0.5.0';
diff --git a/src/index.ts b/src/index.ts
index 4c22207..1ca9576 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -21,6 +21,7 @@ export {
   DEFAULT_THRESHOLDS,
   CATEGORIES,
   VALID_SAMPLING_STRATEGIES,
+  SPEC_VERSION,
 } from './constants.js';
 export { validateRunnerOptions } from './validation.js';
 export type { ValidationResult, ValidationIssue } from './validation.js';

From 50a387dd487e507ec67307de4a727c2d33a8b742 Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 26 Apr 2026 18:13:01 -0400
Subject: [PATCH 12/13] Tighten overly broad llms.txt directive returning false
 positives

---
 eslint.config.js                              |  9 +++++++-
 .../llms-txt-directive-html.ts                |  8 ++++---
 .../llms-txt-directive-md.ts                  |  6 +++++-
 .../checks/llms-txt-directive-html.test.ts    | 21 +++++++++++++++++--
 .../unit/checks/llms-txt-directive-md.test.ts | 21 +++++++++++++++++--
 5 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/eslint.config.js b/eslint.config.js
index 82cb351..bc3b275 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -15,6 +15,13 @@ export default tseslint.config(
     },
   },
   {
-    ignores: ['dist/', 'coverage/', 'node_modules/', 'bin/', 'docs/.vitepress/dist/'],
+    ignores: [
+      'dist/',
+      'coverage/',
+      'node_modules/',
+      'bin/',
+      'docs/.vitepress/dist/',
+      'docs/.vitepress/cache/',
+    ],
   },
 );
diff --git a/src/checks/content-discoverability/llms-txt-directive-html.ts b/src/checks/content-discoverability/llms-txt-directive-html.ts
index c3aa701..973e632 100644
--- a/src/checks/content-discoverability/llms-txt-directive-html.ts
+++ b/src/checks/content-discoverability/llms-txt-directive-html.ts
@@ -20,10 +20,12 @@ const LINK_PATTERN =
   /<a\s[^>]*href\s*=\s*["']([^"']*\/llms\.txt(?:[?#][^"']*)?)["'][^>]*>[\s\S]*?<\/a>/gi;
 
 /**
- * Plain text mentions of "llms.txt" in body content (after nav/script/style
- * are stripped). Catches visually-hidden directives and text-only directives.
+ * Path-like references to an llms.txt file in body content (after
+ * nav/script/style are stripped). Requires a leading slash to distinguish
+ * actual directives (e.g. "See /llms.txt") from documentation prose that
+ * merely discusses the llms.txt concept (e.g. "Create an llms.txt file").
  */
-const TEXT_PATTERN = /llms\.txt/gi;
+const TEXT_PATTERN = /\/llms\.txt/gi;
 
 const TOP_THRESHOLD = 0.1;
 const DEEP_THRESHOLD = 0.5;
diff --git a/src/checks/content-discoverability/llms-txt-directive-md.ts b/src/checks/content-discoverability/llms-txt-directive-md.ts
index d7b59ac..94243d2 100644
--- a/src/checks/content-discoverability/llms-txt-directive-md.ts
+++ b/src/checks/content-discoverability/llms-txt-directive-md.ts
@@ -15,7 +15,11 @@ interface DirectiveResult {
   error?: string;
 }
 
-const DIRECTIVE_PATTERN = /llms\.txt/gi;
+/**
+ * Path-like references to an llms.txt file. Requires a leading slash to
+ * distinguish actual directives from documentation prose about the concept.
+ */
+const DIRECTIVE_PATTERN = /\/llms\.txt/gi;
 
 const TOP_THRESHOLD = 0.1;
 const DEEP_THRESHOLD = 0.5;
diff --git a/test/unit/checks/llms-txt-directive-html.test.ts b/test/unit/checks/llms-txt-directive-html.test.ts
index d9085af..35e94cf 100644
--- a/test/unit/checks/llms-txt-directive-html.test.ts
+++ b/test/unit/checks/llms-txt-directive-html.test.ts
@@ -356,13 +356,13 @@ describe('llms-txt-directive-html', () => {
     expect(result.message).not.toContain('buried');
   });
 
-  it('detects text mention of llms.txt in content area (outside nav)', async () => {
+  it('detects text mention of /llms.txt path in content area (outside nav)', async () => {
     server.use(
       http.get(
         'http://test.local/docs/page1',
         () =>
           new HttpResponse(
-            '<html><body><p>See our llms.txt for a full documentation index.</p><h1>Docs</h1><p>Content...</p></body></html>',
+            '<html><body><p>See /llms.txt for a full documentation index.</p><h1>Docs</h1><p>Content...</p></body></html>',
             { status: 200, headers: { 'Content-Type': 'text/html' } },
           ),
       ),
@@ -372,4 +372,21 @@ describe('llms-txt-directive-html', () => {
     expect(result.status).toBe('pass');
     expect(result.details?.foundCount).toBe(1);
   });
+
+  it('ignores bare "llms.txt" text without path context (documentation prose)', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1',
+        () =>
+          new HttpResponse(
+            '<html><body><p>Create an llms.txt file to help agents discover your docs.</p><h1>Docs</h1><p>Content...</p></body></html>',
+            { status: 200, headers: { 'Content-Type': 'text/html' } },
+          ),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.foundCount).toBe(0);
+  });
 });
diff --git a/test/unit/checks/llms-txt-directive-md.test.ts b/test/unit/checks/llms-txt-directive-md.test.ts
index 8d975b3..e0d3f7c 100644
--- a/test/unit/checks/llms-txt-directive-md.test.ts
+++ b/test/unit/checks/llms-txt-directive-md.test.ts
@@ -152,7 +152,7 @@ describe('llms-txt-directive-md', () => {
       http.get(
         'http://test.local/docs/page1.md',
         () =>
-          new HttpResponse(`# Docs\n\n${padding}> See llms.txt for the index.\n`, {
+          new HttpResponse(`# Docs\n\n${padding}> See /llms.txt for the index.\n`, {
             status: 200,
             headers: { 'Content-Type': 'text/markdown' },
           }),
@@ -313,7 +313,7 @@ describe('llms-txt-directive-md', () => {
       http.get(
         'http://test.local/docs/page1.md',
         () =>
-          new HttpResponse(`# Docs\n\n${before}See llms.txt for index.\n\n${after}`, {
+          new HttpResponse(`# Docs\n\n${before}See /llms.txt for index.\n\n${after}`, {
             status: 200,
             headers: { 'Content-Type': 'text/markdown' },
           }),
@@ -326,6 +326,23 @@ describe('llms-txt-directive-md', () => {
     expect(result.message).not.toContain('buried');
   });
 
+  it('ignores bare "llms.txt" text without path context (documentation prose)', async () => {
+    server.use(
+      http.get(
+        'http://test.local/docs/page1.md',
+        () =>
+          new HttpResponse('# About llms.txt\n\nCreate an llms.txt file to help agents.', {
+            status: 200,
+            headers: { 'Content-Type': 'text/markdown' },
+          }),
+      ),
+    );
+
+    const result = await check.run(makeCtx(llms('/docs/page1')));
+    expect(result.status).toBe('fail');
+    expect(result.details?.foundCount).toBe(0);
+  });
+
   it('handles curated .md pages', async () => {
     server.use(
       http.get(

From e315a349d0e99d1a1acac6bb1e6969a4fbec8b5f Mon Sep 17 00:00:00 2001
From: dacharyc <dc@dacharycarey.com>
Date: Sun, 26 Apr 2026 19:03:49 -0400
Subject: [PATCH 13/13] Note likely scoring changes in migration doc

---
 docs/migration/v0.17.0.md | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/docs/migration/v0.17.0.md b/docs/migration/v0.17.0.md
index efcab89..89a2e08 100644
--- a/docs/migration/v0.17.0.md
+++ b/docs/migration/v0.17.0.md
@@ -8,13 +8,15 @@ If you run AFDocs from the command line and don't use the programmatic API, here
 
 ### Your scores may be different
 
-You may see score changes if you're using the `latest` version of the CLI, even if you don't change anything about how you run AFDocs. The scoring changes reflect refinements from community conversations and based on how agents actually interact with documentation:
+You may see score changes if you're using the `latest` version of the CLI, even if you don't change anything about how you run AFDocs. In smoke testing across sites spanning the A through F grade range, we saw overall score changes between -10 and +8 points. Most sites changed by ±5 points or fewer, and grade-letter changes were uncommon. The scoring changes reflect refinements from community conversations and based on additional testing around how agents actually interact with documentation:
 
-- **HTML page size and content start position scores may improve.** AFDocs now strips `<script>` and `<style>` elements from HTML before measuring page size, matching a change to how Claude Code processes pages. Sites with heavy inline JavaScript or CSS will see smaller measured sizes, which could improve your scores.
+- **Content discoverability scores will likely drop.** The directive check split (see below) and coverage changes combine to lower this category for most sites. In testing, every site we checked saw a content-discoverability drop, ranging from 3 to 14 points.
+
+- **The directive check now scores HTML and markdown separately.** The old `llms-txt-directive` check has been replaced by two checks: `llms-txt-directive-html` (checks HTML pages) and `llms-txt-directive-md` (checks markdown pages). Most sites have the directive in one format but not the other. A site that previously scored "warn" on the combined check may now get a pass on one and a fail on the other. For example, a site that embeds the directive only in its HTML pages will pass `llms-txt-directive-html` but fail `llms-txt-directive-md`. The HTML check also now ignores matches in navigation sidebars, which eliminates false positives that some sites were seeing.
 
-- **Coverage scores (formerly freshness) may drop.** Some paths that were previously excluded from coverage calculations (`/changelog`, `/releases`, `/security`, `/status`) are no longer excluded by default. If your sitemap includes these paths but your llms.txt doesn't link to them, your coverage percentage will be lower. You can restore the old behavior with `--coverage-exclusions "/changelog/**,/releases/**,/security/**,/status/**"`.
+- **HTML page size and content start position scores may improve.** AFDocs now strips `<script>` and `<style>` elements from HTML before measuring page size, matching a change to how Claude Code processes pages. Sites with heavy inline JavaScript or CSS will see smaller measured sizes, which could improve your scores.
 
-- **The directive check works differently.** The old `llms-txt-directive` check has been replaced by two checks: `llms-txt-directive-html` (checks HTML pages) and `llms-txt-directive-md` (checks markdown pages). The HTML check now ignores matches in navigation sidebars, which eliminates false positives that some sites were seeing.
+- **Coverage scores may change in either direction.** Some paths that were previously excluded from llms.txt coverage calculations (`/changelog`, `/releases`, `/security`, `/status`) are no longer excluded by default. If your sitemap includes these paths but your llms.txt doesn't link to them, your coverage percentage will be lower. You can restore the old behavior with `--coverage-exclusions "/changelog/**,/releases/**,/security/**,/status/**"`. On the other hand, sites that use nested llms.txt indexes (where a top-level llms.txt links to section-level llms.txt files) may see coverage _improve_, because the check now recognizes subtrees covered by nested indexes instead of counting every individual page against the denominator.
 
 - **Runs that discover few pages now show N/A for some categories.** If AFDocs discovers fewer than 5 pages during automatic sampling, page-level categories (Page Size, Content Structure, URL Stability, etc.) show as N/A instead of producing scores based on too little data. Site-level checks (llms.txt, coverage, authentication) still score normally.
 
@@ -96,15 +98,31 @@ No existing flags were removed or renamed.
 
 ## Score changes you may notice
 
-Even without changing your configuration, you may see different scores after upgrading:
+Even without changing your configuration, you may see different scores after upgrading. In smoke testing across sites at every grade level, overall scores changed by -10 to +8 points. Most sites moved by ±5 or fewer, and grade-letter changes were uncommon (1 downgrade and 1 upgrade out of 6 tested sites). Category-level changes can be larger than the overall change because categories offset each other.
+
+### Content discoverability will likely drop
+
+The directive check split is the primary driver. The old `llms-txt-directive` (weight 7) has been replaced by `llms-txt-directive-html` (weight 7) and `llms-txt-directive-md` (weight 4). Most sites have the llms.txt directive in one format but not the other, so the common outcome is one pass and one fail where the old check was a single warn. For example, a site that embeds a `<!-- For AI agents: ... -->` comment in its HTML templates will pass the HTML check but fail the markdown check (since HTML comments don't appear in the markdown output). Conversely, a site that prepends a directive line to its markdown content will pass the markdown check but fail the HTML check if that line doesn't also appear in the rendered HTML.
+
+In testing, content-discoverability dropped for every site we checked (between 3 and 14 points). To improve this category, add the directive in both HTML and markdown formats.
+
+### `llms-txt-directive-html` may produce different results than the old `llms-txt-directive`
+
+Beyond the split, the HTML check now strips `<nav>`, `<script>`, and `<style>` elements from the HTML body before searching for directives. The old logic may have produced false positives for pages that passed due to mentioning llms.txt in navigation sidebars or in documentation about generating llms.txt files.
+
+### `page-size-html` and `content-start-position` scores may improve
+
+v0.17.0 strips `<script>` and `<style>` elements from HTML before conversion, matching how agents (including Claude Code) actually process pages. Sites with heavy inline JavaScript or CSS will see smaller post-conversion sizes and earlier content start positions, which may improve scoring.
+
+### `llms-txt-coverage` scores may change in either direction
 
-- **`page-size-html` and `content-start-position`** scores may improve. v0.17.0 strips `<script>` and `<style>` elements from HTML before conversion, matching how agents (including Claude Code) actually process pages. Sites with heavy inline JavaScript or CSS will see smaller post-conversion sizes and earlier content start positions, which may improve scoring.
+Coverage scores may **drop** because the built-in exclusion list was trimmed: `/changelog`, `/releases`, `/security`, and `/status` paths are no longer automatically excluded from the sitemap denominator. If your sitemap includes these paths but your llms.txt does not, coverage percentages will be lower. If you intentionally exclude URLs that include these paths from your llms.txt, you can restore the old behavior with `--coverage-exclusions "/changelog/**,/releases/**,/security/**,/status/**"`.
 
-- **`llms-txt-coverage`** scores may drop. The built-in exclusion list was trimmed: `/changelog`, `/releases`, `/security`, and `/status` paths are no longer automatically excluded from the sitemap denominator. If your sitemap includes these paths but your llms.txt does not, coverage percentages will be lower. If you intentionally exclude URLs that include these paths from your llms.txt, you can restore the old behavior with `--coverage-exclusions "/changelog/**,/releases/**,/security/**,/status/**"`.
+Coverage scores may **improve** if your llms.txt uses nested indexes (where the top-level file links to section-level llms.txt files like `/docs/guides/llms.txt`). The coverage check now recognizes these subtrees and counts them as covered, rather than requiring every individual page to appear in the top-level file. One site in testing went from 30% coverage (fail) to 100% (pass) after this change.
 
-- **`llms-txt-directive-html`** may produce different results than the old `llms-txt-directive`. The new check strips `<nav>`, `<script>`, and `<style>` elements from the HTML body before searching for directives. The old logic may have produced false positives for pages that formerly passed due to mentioning llms.txt in these areas, such as in platform documentation sites that document generating llms.txt files.
+### Low page count runs now produce N/A scores
 
-- **Low page count runs** now produce N/A scores for page-level categories. When automatic discovery (`random` or `deterministic` sampling) finds fewer than 5 pages, page-level checks get `scoreDisplayMode: "notApplicable"` and are excluded from the overall score. This prevents a handful of pages from producing unreliable category scores. Site-level checks (llms.txt checks, coverage, auth-alternative-access) continue to score normally.
+When automatic discovery (`random` or `deterministic` sampling) finds fewer than 5 pages, page-level checks get `scoreDisplayMode: "notApplicable"` and are excluded from the overall score. This prevents a handful of pages from producing unreliable category scores. Site-level checks (llms.txt checks, coverage, auth-alternative-access) continue to score normally.
 
 ## Programmatic API changes