From b046a4ab53ff333d08de43c855e9ca8baf893501 Mon Sep 17 00:00:00 2001 From: IISweetHeartII Date: Thu, 19 Feb 2026 23:45:46 +0900 Subject: [PATCH] feat: implement Phase 1 gatherers, audits, and runner pipeline (#38) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add complete Gather → Audit → Score pipeline: Gatherers (3): - HttpGatherer: parallel fetch of 6 well-known files - HtmlGatherer: regex-based extraction of JSON-LD, meta tags, semantic HTML - ApiGatherer: OpenAPI spec analysis, header/auth/rate-limit detection Audits (19): - Discovery: llms-txt, openapi-spec, robots-ai, ai-plugin, schema-org - API Quality: openapi-valid, response-format, response-examples, content-negotiation - Structured Data: json-ld, meta-tags, semantic-html - Auth & Onboarding: self-service-auth, no-captcha - Error Handling: error-codes, rate-limit-headers, retry-after - Documentation: machine-readable-docs, sdk-available Runner: orchestrates two-pass gathering (HTTP first, then derived HTML+API), executes all audits with error resilience, and feeds results into the existing scoring engine. Co-Authored-By: Claude Opus 4.6 --- src/audits/ai-plugin.ts | 75 ++++++++++++++ src/audits/content-negotiation.ts | 68 +++++++++++++ src/audits/error-codes.ts | 43 ++++++++ src/audits/index.ts | 31 ++++++ src/audits/json-ld.ts | 56 +++++++++++ src/audits/llms-txt.ts | 49 +++++++++ src/audits/machine-readable-docs.ts | 44 ++++++++ src/audits/meta-tags.ts | 69 +++++++++++++ src/audits/no-captcha.ts | 42 ++++++++ src/audits/openapi-spec.ts | 59 +++++++++++ src/audits/openapi-valid.ts | 85 ++++++++++++++++ src/audits/rate-limit-headers.ts | 49 +++++++++ src/audits/response-examples.ts | 49 +++++++++ src/audits/response-format.ts | 41 ++++++++ src/audits/retry-after.ts | 43 ++++++++ src/audits/robots-ai.ts | 124 +++++++++++++++++++++++ src/audits/schema-org.ts | 63 ++++++++++++ src/audits/sdk-available.ts | 43 ++++++++ src/audits/self-service-auth.ts | 42 ++++++++ src/audits/semantic-html.ts | 61 +++++++++++ src/gatherers/api-gatherer.ts | 150 ++++++++++++++++++++++++++++ src/gatherers/base-gatherer.ts | 8 +- src/gatherers/html-gatherer.ts | 124 +++++++++++++++++++++++ src/gatherers/http-gatherer.ts | 113 +++++++++++++++++++++ src/gatherers/index.ts | 9 ++ src/index.ts | 26 ++++- src/runner.ts | 110 +++++++++++++++++++- 27 files changed, 1670 insertions(+), 6 deletions(-) create mode 100644 src/audits/ai-plugin.ts create mode 100644 src/audits/content-negotiation.ts create mode 100644 src/audits/error-codes.ts create mode 100644 src/audits/json-ld.ts create mode 100644 src/audits/llms-txt.ts create mode 100644 src/audits/machine-readable-docs.ts create mode 100644 src/audits/meta-tags.ts create mode 100644 src/audits/no-captcha.ts create mode 100644 src/audits/openapi-spec.ts create mode 100644 src/audits/openapi-valid.ts create mode 100644 src/audits/rate-limit-headers.ts create mode 100644 src/audits/response-examples.ts create mode 100644 src/audits/response-format.ts create mode 100644 src/audits/retry-after.ts create mode 100644 src/audits/robots-ai.ts create mode 100644 src/audits/schema-org.ts create mode 100644 src/audits/sdk-available.ts create mode 100644 src/audits/self-service-auth.ts create mode 100644 src/audits/semantic-html.ts create mode 100644 src/gatherers/api-gatherer.ts create mode 100644 src/gatherers/html-gatherer.ts create mode 100644 src/gatherers/http-gatherer.ts diff --git a/src/audits/ai-plugin.ts b/src/audits/ai-plugin.ts new file mode 100644 index 0000000..78fbf8f --- /dev/null +++ b/src/audits/ai-plugin.ts @@ -0,0 +1,75 @@ +import type { AuditResult } from '../types.js'; +import type { GatherResult } from '../gatherers/base-gatherer.js'; +import type { HttpGatherResult } from '../gatherers/http-gatherer.js'; +import { BaseAudit, type AuditMeta } from './base-audit.js'; + +/** + * Checks whether the site exposes an AI plugin manifest at + * `/.well-known/ai-plugin.json`. + * + * The ai-plugin.json manifest is the OpenAI ChatGPT plugin standard + * and enables AI platforms to discover and integrate with the site. + */ +export class AiPluginAudit extends BaseAudit { + meta: AuditMeta = { + id: 'ai-plugin', + title: 'Site provides an ai-plugin.json manifest', + failureTitle: 'Site does not provide an ai-plugin.json manifest', + description: + 'An ai-plugin.json manifest at /.well-known/ai-plugin.json enables AI platforms ' + + '(such as ChatGPT plugins) to discover and interact with the site.', + requiredGatherers: ['http'], + scoreDisplayMode: 'binary', + }; + + async audit(artifacts: Record): Promise { + const http = artifacts['http'] as HttpGatherResult; + const aiPlugin = http.aiPlugin; + + if (!aiPlugin.found) { + return this.fail({ + type: 'text', + summary: 'No /.well-known/ai-plugin.json file was found at the target URL.', + }); + } + + const content = (aiPlugin.content ?? '').trim(); + + if (content.length === 0) { + return this.fail({ + type: 'text', + summary: 'An ai-plugin.json file was found but it is empty.', + }); + } + + // Validate that it is parseable JSON + try { + const manifest = JSON.parse(content) as Record; + + // Basic structural validation: check for expected fields + const hasRequiredFields = + typeof manifest.schema_version === 'string' && + typeof manifest.name_for_human === 'string' && + typeof manifest.name_for_model === 'string'; + + if (!hasRequiredFields) { + return this.partial(0.5, { + type: 'text', + summary: + 'An ai-plugin.json file was found with valid JSON, but it is missing ' + + 'expected fields (schema_version, name_for_human, name_for_model).', + }); + } + + return this.pass({ + type: 'text', + summary: `Found a valid ai-plugin.json manifest for "${String(manifest.name_for_human)}".`, + }); + } catch { + return this.fail({ + type: 'text', + summary: 'An ai-plugin.json file was found but it contains invalid JSON.', + }); + } + } +} diff --git a/src/audits/content-negotiation.ts b/src/audits/content-negotiation.ts new file mode 100644 index 0000000..2c01bcb --- /dev/null +++ b/src/audits/content-negotiation.ts @@ -0,0 +1,68 @@ +import type { AuditResult } from '../types.js'; +import type { GatherResult } from '../gatherers/base-gatherer.js'; +import type { HttpGatherResult } from '../gatherers/http-gatherer.js'; +import { BaseAudit, type AuditMeta } from './base-audit.js'; + +/** + * Checks whether the site returns proper content-type headers and + * supports JSON-based content negotiation. + * + * Proper content-type headers and accept-header support enable AI agents + * to negotiate the most suitable response format. + */ +export class ContentNegotiationAudit extends BaseAudit { + meta: AuditMeta = { + id: 'content-negotiation', + title: 'Site supports proper content negotiation', + failureTitle: 'Site does not support proper content negotiation', + description: + 'Proper content-type headers and support for JSON content negotiation ' + + 'enable AI agents to request and receive data in the most suitable format.', + requiredGatherers: ['http'], + scoreDisplayMode: 'binary', + }; + + async audit(artifacts: Record): Promise { + const http = artifacts['http'] as HttpGatherResult; + const headers = http.headers; + + const contentType = headers['content-type'] ?? ''; + + // Check that a content-type header is present at all + if (!contentType) { + return this.fail({ + type: 'text', + summary: + 'The site does not return a content-type header. ' + + 'AI agents need content-type headers to correctly parse responses.', + }); + } + + // Check for JSON support indicators + const hasJsonSupport = + contentType.includes('application/json') || + headers['accept']?.includes('application/json') || + // Vary: Accept header indicates the server performs content negotiation + (headers['vary'] ?? '').toLowerCase().includes('accept'); + + if (hasJsonSupport) { + return this.pass({ + type: 'table', + summary: 'The site returns proper content-type headers and supports JSON.', + items: [ + { header: 'content-type', value: contentType }, + ...(headers['vary'] ? [{ header: 'vary', value: headers['vary'] }] : []), + ], + }); + } + + // The site has a content-type header but no JSON support signals + return this.fail({ + type: 'table', + summary: + 'The site returns a content-type header but does not indicate JSON support. ' + + 'Consider supporting application/json for AI agent consumption.', + items: [{ header: 'content-type', value: contentType }], + }); + } +} diff --git a/src/audits/error-codes.ts b/src/audits/error-codes.ts new file mode 100644 index 0000000..7e79892 --- /dev/null +++ b/src/audits/error-codes.ts @@ -0,0 +1,43 @@ +import type { AuditResult } from '../types.js'; +import type { GatherResult } from '../gatherers/base-gatherer.js'; +import type { ApiGatherResult } from '../gatherers/api-gatherer.js'; +import { BaseAudit, type AuditMeta } from './base-audit.js'; + +/** + * Checks if the API uses structured error codes. + * Structured error responses (with code, message, and error fields) help AI agents + * programmatically handle failures and implement retry/recovery logic. + */ +export class ErrorCodesAudit extends BaseAudit { + meta: AuditMeta = { + id: 'error-codes', + title: 'API uses structured error codes', + failureTitle: 'API lacks structured error codes', + description: + 'Structured error codes (e.g., {"error": "...", "code": "...", "message": "..."}) ' + + 'allow AI agents to programmatically interpret failures and take corrective action ' + + 'rather than attempting to parse free-text error messages.', + requiredGatherers: ['api'], + scoreDisplayMode: 'binary', + }; + + async audit(artifacts: Record): Promise { + const api = artifacts['api'] as ApiGatherResult; + + if (api.errorCodeStructured) { + return this.pass({ + type: 'text', + summary: + 'Structured error codes detected in API specification. ' + + 'Error responses include machine-readable code and message fields.', + }); + } + + return this.fail({ + type: 'text', + summary: + 'No structured error codes found. Define error response schemas with ' + + '"error", "code", and "message" fields in your API specification.', + }); + } +} diff --git a/src/audits/index.ts b/src/audits/index.ts index f926d48..1fd7410 100644 --- a/src/audits/index.ts +++ b/src/audits/index.ts @@ -1,2 +1,33 @@ export { BaseAudit } from './base-audit.js'; export type { AuditMeta } from './base-audit.js'; + +// Discovery +export { LlmsTxtAudit } from './llms-txt.js'; +export { OpenapiSpecAudit } from './openapi-spec.js'; +export { RobotsAiAudit } from './robots-ai.js'; +export { AiPluginAudit } from './ai-plugin.js'; +export { SchemaOrgAudit } from './schema-org.js'; + +// API Quality +export { OpenapiValidAudit } from './openapi-valid.js'; +export { ResponseFormatAudit } from './response-format.js'; +export { ResponseExamplesAudit } from './response-examples.js'; +export { ContentNegotiationAudit } from './content-negotiation.js'; + +// Structured Data +export { JsonLdAudit } from './json-ld.js'; +export { MetaTagsAudit } from './meta-tags.js'; +export { SemanticHtmlAudit } from './semantic-html.js'; + +// Auth & Onboarding +export { SelfServiceAuthAudit } from './self-service-auth.js'; +export { NoCaptchaAudit } from './no-captcha.js'; + +// Error Handling +export { ErrorCodesAudit } from './error-codes.js'; +export { RateLimitHeadersAudit } from './rate-limit-headers.js'; +export { RetryAfterAudit } from './retry-after.js'; + +// Documentation +export { MachineReadableDocsAudit } from './machine-readable-docs.js'; +export { SdkAvailableAudit } from './sdk-available.js'; diff --git a/src/audits/json-ld.ts b/src/audits/json-ld.ts new file mode 100644 index 0000000..0ac6671 --- /dev/null +++ b/src/audits/json-ld.ts @@ -0,0 +1,56 @@ +import type { AuditResult, AuditDetails } from '../types.js'; +import type { GatherResult } from '../gatherers/base-gatherer.js'; +import type { HtmlGatherResult } from '../gatherers/html-gatherer.js'; +import { BaseAudit, type AuditMeta } from './base-audit.js'; + +/** + * Checks if the page contains valid JSON-LD structured data. + * JSON-LD helps AI agents understand page content and entity relationships. + */ +export class JsonLdAudit extends BaseAudit { + meta: AuditMeta = { + id: 'json-ld', + title: 'Page has JSON-LD structured data', + failureTitle: 'Page is missing JSON-LD structured data', + description: + 'JSON-LD provides machine-readable structured data that helps AI agents ' + + 'understand page content, entities, and relationships without parsing HTML.', + requiredGatherers: ['html'], + scoreDisplayMode: 'binary', + }; + + async audit(artifacts: Record): Promise { + const html = artifacts['html'] as HtmlGatherResult; + const jsonLdBlocks = html.jsonLd; + + if (jsonLdBlocks.length > 0) { + const types = jsonLdBlocks + .filter( + (block): block is Record => + typeof block === 'object' && block !== null + ) + .map((block) => block['@type'] as string | undefined) + .filter(Boolean); + + const details: AuditDetails = { + type: 'table', + items: [ + { + blockCount: jsonLdBlocks.length, + types: types.join(', ') || 'unknown', + }, + ], + summary: `Found ${jsonLdBlocks.length} JSON-LD block(s)`, + }; + + return this.pass(details); + } + + return this.fail({ + type: 'text', + summary: + 'No JSON-LD structured data found. Add