Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 161 additions & 29 deletions packages/vinext/src/shims/fetch-cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import { AsyncLocalStorage } from "node:async_hooks";
const HEADER_BLOCKLIST = ["traceparent", "tracestate"];

// Cache key version — bump when changing the key format to bust stale entries
const CACHE_KEY_PREFIX = "v2";
const CACHE_KEY_PREFIX = "v3";
const MAX_CACHE_KEY_BODY_BYTES = 1024 * 1024; // 1 MiB

class BodyTooLargeForCacheKeyError extends Error {
Expand All @@ -46,6 +46,12 @@ class BodyTooLargeForCacheKeyError extends Error {
}
}

class SkipCacheKeyGenerationError extends Error {
constructor() {
super("Fetch body could not be serialized for cache key generation");
}
}

/**
* Collect all headers from the request, excluding the blocklist.
* Merges headers from both the Request object and the init object,
Expand Down Expand Up @@ -87,19 +93,114 @@ function hasAuthHeaders(input: string | URL | Request, init?: RequestInit): bool
return AUTH_HEADERS.some((name) => name in headers);
}

async function serializeFormData(
formData: FormData,
pushBodyChunk: (chunk: string) => void,
getTotalBodyBytes: () => number,
): Promise<void> {
for (const [key, val] of formData.entries()) {
if (typeof val === "string") {
pushBodyChunk(JSON.stringify([key, { kind: "string", value: val }]));
continue;
}
if (val.size > MAX_CACHE_KEY_BODY_BYTES || getTotalBodyBytes() + val.size > MAX_CACHE_KEY_BODY_BYTES) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File size check doesn't account for JSON serialization overhead.

The size check uses val.size (raw file bytes), but pushBodyChunk then calls JSON.stringify(...) which adds the key name, {kind: "file", name: ..., type: ..., value: ...} wrapper, and JSON escaping overhead. For a file right at the size limit, the serialized JSON string will be larger than val.size, potentially exceeding the limit without being caught by this pre-check.

In practice this is unlikely to matter (the pushBodyChunk function itself also checks totalBodyBytes against the limit), but the pre-check here is misleadingly precise. Not blocking — just noting the discrepancy.

throw new BodyTooLargeForCacheKeyError();
}
pushBodyChunk(JSON.stringify([key, {
kind: "file",
name: val.name,
type: val.type,
value: await val.text(),
}]));
}
}

type ParsedFormContentType = "multipart/form-data" | "application/x-www-form-urlencoded";

function getParsedFormContentType(contentType: string | undefined): ParsedFormContentType | undefined {
const mediaType = contentType?.split(";")[0]?.trim().toLowerCase();
if (mediaType === "multipart/form-data" || mediaType === "application/x-www-form-urlencoded") {
return mediaType;
}
return undefined;
}

function stripMultipartBoundary(contentType: string): string {
const [type, ...params] = contentType.split(";");
const keptParams = params
.map((param) => param.trim())
.filter(Boolean)
.filter((param) => !/^boundary\s*=/i.test(param));
const normalizedType = type.trim().toLowerCase();
return keptParams.length > 0
? `${normalizedType}; ${keptParams.join("; ")}`
: normalizedType;
}

interface SerializedBodyResult {
bodyChunks: string[];
canonicalizedContentType?: string;
}

async function readRequestBodyChunksWithinLimit(request: Request): Promise<{
chunks: Uint8Array[];
contentType: string | undefined;
}> {
const contentLengthHeader = request.headers.get("content-length");
if (contentLengthHeader) {
const contentLength = Number(contentLengthHeader);
if (Number.isFinite(contentLength) && contentLength > MAX_CACHE_KEY_BODY_BYTES) {
throw new BodyTooLargeForCacheKeyError();
}
}

const requestClone = request.clone();
const contentType = requestClone.headers.get("content-type") ?? undefined;
const reader = requestClone.body?.getReader();
if (!reader) {
return { chunks: [], contentType };
}

const chunks: Uint8Array[] = [];
let totalBodyBytes = 0;

try {
while (true) {
const { done, value } = await reader.read();
if (done) break;

totalBodyBytes += value.byteLength;
if (totalBodyBytes > MAX_CACHE_KEY_BODY_BYTES) {
throw new BodyTooLargeForCacheKeyError();
}

chunks.push(value);
}
} catch (err) {
void reader.cancel().catch(() => {});
throw err;
}

return { chunks, contentType };
}

/**
* Serialize request body into string chunks for cache key inclusion.
* Handles all body types: string, Uint8Array, ReadableStream, FormData, Blob.
* Handles all body types: string, Uint8Array, ReadableStream, FormData, Blob,
* and Request object bodies.
* Returns the serialized body chunks and optionally stashes the original body
* on init as `_ogBody` so it can still be used after stream consumption.
*/
async function serializeBody(init?: RequestInit): Promise<string[]> {
if (!init?.body) return [];
async function serializeBody(input: string | URL | Request, init?: RequestInit): Promise<SerializedBodyResult> {
if (!init?.body && !(input instanceof Request && input.body)) {
return { bodyChunks: [] };
}

const bodyChunks: string[] = [];
const encoder = new TextEncoder();
const decoder = new TextDecoder();
let totalBodyBytes = 0;
let canonicalizedContentType: string | undefined;

const pushBodyChunk = (chunk: string): void => {
totalBodyBytes += encoder.encode(chunk).byteLength;
Expand All @@ -108,14 +209,15 @@ async function serializeBody(init?: RequestInit): Promise<string[]> {
}
bodyChunks.push(chunk);
};
const getTotalBodyBytes = (): number => totalBodyBytes;

if (init.body instanceof Uint8Array) {
if (init?.body instanceof Uint8Array) {
if (init.body.byteLength > MAX_CACHE_KEY_BODY_BYTES) {
throw new BodyTooLargeForCacheKeyError();
}
pushBodyChunk(decoder.decode(init.body));
(init as any)._ogBody = init.body;
} else if (typeof (init.body as any).getReader === "function") {
} else if (init?.body && typeof (init.body as any).getReader === "function") {
// ReadableStream
const readableBody = init.body as ReadableStream<Uint8Array | string>;
const [bodyForHashing, bodyForFetch] = readableBody.tee();
Expand Down Expand Up @@ -147,32 +249,18 @@ async function serializeBody(init?: RequestInit): Promise<string[]> {
if (err instanceof BodyTooLargeForCacheKeyError) {
throw err;
}
console.error("[vinext] Problem reading body for cache key", err);
throw new SkipCacheKeyGenerationError();
}
} else if (init.body instanceof URLSearchParams) {
} else if (init?.body instanceof URLSearchParams) {
// URLSearchParams — .toString() gives a stable serialization
(init as any)._ogBody = init.body;
pushBodyChunk(init.body.toString());
} else if (typeof (init.body as any).keys === "function") {
} else if (init?.body && typeof (init.body as any).keys === "function") {
// FormData
const formData = init.body as FormData;
(init as any)._ogBody = init.body;
for (const key of new Set(formData.keys())) {
const values = formData.getAll(key);
const serializedValues = await Promise.all(
values.map(async (val) => {
if (typeof val === "string") return val;
if (val.size > MAX_CACHE_KEY_BODY_BYTES || totalBodyBytes + val.size > MAX_CACHE_KEY_BODY_BYTES) {
throw new BodyTooLargeForCacheKeyError();
}
// Note: File name/type/lastModified are not included — only content.
// Two Files with identical content but different names produce the same key.
return await val.text();
})
);
pushBodyChunk(`${key}=${serializedValues.join(",")}`);
}
} else if (typeof (init.body as any).arrayBuffer === "function") {
await serializeFormData(formData, pushBodyChunk, getTotalBodyBytes);
} else if (init?.body && typeof (init.body as any).arrayBuffer === "function") {
// Blob
const blob = init.body as Blob;
if (blob.size > MAX_CACHE_KEY_BODY_BYTES) {
Expand All @@ -181,17 +269,58 @@ async function serializeBody(init?: RequestInit): Promise<string[]> {
pushBodyChunk(await blob.text());
const arrayBuffer = await blob.arrayBuffer();
(init as any)._ogBody = new Blob([arrayBuffer], { type: blob.type });
} else if (typeof init.body === "string") {
} else if (typeof init?.body === "string") {
// String length is always <= UTF-8 byte length, so this is a
// cheap lower-bound check that avoids encoder.encode() for huge strings.
if (init.body.length > MAX_CACHE_KEY_BODY_BYTES) {
throw new BodyTooLargeForCacheKeyError();
}
pushBodyChunk(init.body);
(init as any)._ogBody = init.body;
} else if (input instanceof Request && input.body) {
let chunks: Uint8Array[];
let contentType: string | undefined;
try {
({ chunks, contentType } = await readRequestBodyChunksWithinLimit(input));
} catch (err) {
if (err instanceof BodyTooLargeForCacheKeyError) {
throw err;
}
throw new SkipCacheKeyGenerationError();
}
const formContentType = getParsedFormContentType(contentType);

if (formContentType) {
try {
const boundedRequest = new Request(input.url, {
method: input.method,
headers: contentType ? { "content-type": contentType } : undefined,
body: new Blob(chunks as unknown as BlobPart[]),
});
const formData = await boundedRequest.formData();
await serializeFormData(formData, pushBodyChunk, getTotalBodyBytes);
canonicalizedContentType = formContentType === "multipart/form-data" && contentType
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Boundary stripping only applies to multipart/form-data, not application/x-www-form-urlencoded.

This is correct behavior (urlencoded doesn't have boundaries), but the conditional reads a bit awkwardly. Consider adding a brief comment explaining the asymmetry, since a reader might wonder why only multipart gets canonicalizedContentType treatment.

Also: when formContentType is "application/x-www-form-urlencoded", canonicalizedContentType stays undefined, so the original content-type header (including any charset=... params) remains in the cache key via collectHeaders. The test on line 525 ("urlencoded Request bodies with different charset headers get separate cache entries") confirms this works correctly. Good.

? stripMultipartBoundary(contentType)
: undefined;
return { bodyChunks, canonicalizedContentType };
} catch (err) {
if (err instanceof BodyTooLargeForCacheKeyError) {
throw err;
}
throw new SkipCacheKeyGenerationError();
}
}

for (const chunk of chunks) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-form Request bodies don't go through pushBodyChunk for size checking.

When a Request body has a non-form content type, the chunks are decoded and pushed directly into bodyChunks (line 315) without going through pushBodyChunk, which means totalBodyBytes is not updated and the per-chunk size limit in pushBodyChunk is never hit. Only the final trailing chunk on line 319 goes through pushBodyChunk.

This is partially mitigated by the streaming size check in readRequestBodyChunksWithinLimit (which enforces the 1 MiB limit during reading), so this won't cause an actual over-limit key. But it means totalBodyBytes is inaccurate for the rest of the function, which could matter if future code depends on it.

Suggested change
for (const chunk of chunks) {
for (const chunk of chunks) {
pushBodyChunk(decoder.decode(chunk, { stream: true }));
}

pushBodyChunk(decoder.decode(chunk, { stream: true }));
}
const finalChunk = decoder.decode();
if (finalChunk) {
pushBodyChunk(finalChunk);
}
}

return bodyChunks;
return { bodyChunks, canonicalizedContentType };
}

/**
Expand All @@ -218,7 +347,10 @@ async function buildFetchCacheKey(input: string | URL | Request, init?: RequestI
if (init?.method) method = init.method;

const headers = collectHeaders(input, init);
const bodyChunks = await serializeBody(init);
const { bodyChunks, canonicalizedContentType } = await serializeBody(input, init);
if (canonicalizedContentType) {
headers["content-type"] = canonicalizedContentType;
}

const cacheString = JSON.stringify([
CACHE_KEY_PREFIX,
Expand Down Expand Up @@ -386,7 +518,7 @@ function createPatchedFetch(): typeof globalThis.fetch {
try {
cacheKey = await buildFetchCacheKey(input, init);
} catch (err) {
if (err instanceof BodyTooLargeForCacheKeyError) {
if (err instanceof BodyTooLargeForCacheKeyError || err instanceof SkipCacheKeyGenerationError) {
const cleanInit = stripNextFromInit(init);
return originalFetch(input, cleanInit);
}
Expand Down
Loading