From a68b2c2b9c1a07c6c9ea721be8ccb630959cec23 Mon Sep 17 00:00:00 2001 From: VivianHublo Date: Mon, 4 Aug 2025 14:08:59 +0200 Subject: [PATCH 1/3] perf: uint8array set method instead of spread --- packages/hub/src/utils/XetBlob.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/hub/src/utils/XetBlob.ts b/packages/hub/src/utils/XetBlob.ts index 0f817d4ca6..7c0e9dc2ec 100644 --- a/packages/hub/src/utils/XetBlob.ts +++ b/packages/hub/src/utils/XetBlob.ts @@ -327,8 +327,11 @@ export class XetBlob extends Blob { totalFetchBytes += result.value.byteLength; if (leftoverBytes) { - result.value = new Uint8Array([...leftoverBytes, ...result.value]); - leftoverBytes = undefined; + const leftoverBytesLength: number = leftoverBytes.length; + const combinedBytes = new Uint8Array(leftoverBytesLength + result.value.length); + combinedBytes.set(leftoverBytes); + combinedBytes.set(result.value, leftoverBytesLength); + result.value = combinedBytes; } while (totalBytesRead < maxBytes && result.value.byteLength) { From 6753962eea99f429092f70fcfc07aefa6b86876f Mon Sep 17 00:00:00 2001 From: Vivian Date: Sat, 30 Aug 2025 14:19:49 +0200 Subject: [PATCH 2/3] feat: add combineUint8Arrays utility and corresponding tests --- packages/hub/src/utils/XetBlob.ts | 36 +++++++++---------- .../hub/src/utils/combineUint8Arrays.spec.ts | 15 ++++++++ packages/hub/src/utils/combineUint8Arrays.ts | 10 ++++++ 3 files changed, 41 insertions(+), 20 deletions(-) create mode 100644 packages/hub/src/utils/combineUint8Arrays.spec.ts create mode 100644 packages/hub/src/utils/combineUint8Arrays.ts diff --git a/packages/hub/src/utils/XetBlob.ts b/packages/hub/src/utils/XetBlob.ts index 7c0e9dc2ec..266bac3f50 100644 --- a/packages/hub/src/utils/XetBlob.ts +++ b/packages/hub/src/utils/XetBlob.ts @@ -1,6 +1,7 @@ import { createApiError } from "../error"; import type { CredentialsParams } from "../types/public"; import { checkCredentials } from "./checkCredentials"; +import { combineUint8Arrays } from "./combineUint8Arrays"; import { decompress as lz4_decompress } from "../vendor/lz4js"; import { RangeList } from "./RangeList"; @@ -201,7 +202,7 @@ export class XetBlob extends Blob { rangeList.add(term.range.start, term.range.end); } const listener = this.listener; - const log = this.internalLogging ? (...args: unknown[]) => console.log(...args) : () => {}; + const log = this.internalLogging ? (...args: unknown[]) => console.log(...args) : () => { }; async function* readData( reconstructionInfo: ReconstructionInfo, @@ -327,14 +328,11 @@ export class XetBlob extends Blob { totalFetchBytes += result.value.byteLength; if (leftoverBytes) { - const leftoverBytesLength: number = leftoverBytes.length; - const combinedBytes = new Uint8Array(leftoverBytesLength + result.value.length); - combinedBytes.set(leftoverBytes); - combinedBytes.set(result.value, leftoverBytesLength); - result.value = combinedBytes; + result.value = combineUint8Arrays(leftoverBytes, result.value); + leftoverBytes = undefined; } - while (totalBytesRead < maxBytes && result.value.byteLength) { + while (totalBytesRead < maxBytes && result.value?.byteLength) { if (result.value.byteLength < 8) { // We need 8 bytes to parse the chunk header leftoverBytes = result.value; @@ -361,8 +359,7 @@ export class XetBlob extends Blob { chunkHeader.compression_scheme !== XetChunkCompressionScheme.ByteGroupingLZ4 ) { throw new Error( - `Unsupported compression scheme ${ - compressionSchemeLabels[chunkHeader.compression_scheme] ?? chunkHeader.compression_scheme + `Unsupported compression scheme ${compressionSchemeLabels[chunkHeader.compression_scheme] ?? chunkHeader.compression_scheme }` ); } @@ -379,13 +376,13 @@ export class XetBlob extends Blob { chunkHeader.compression_scheme === XetChunkCompressionScheme.LZ4 ? lz4_decompress(result.value.slice(0, chunkHeader.compressed_length), chunkHeader.uncompressed_length) : chunkHeader.compression_scheme === XetChunkCompressionScheme.ByteGroupingLZ4 - ? bg4_regroup_bytes( - lz4_decompress( - result.value.slice(0, chunkHeader.compressed_length), - chunkHeader.uncompressed_length - ) - ) - : result.value.slice(0, chunkHeader.compressed_length); + ? bg4_regroup_bytes( + lz4_decompress( + result.value.slice(0, chunkHeader.compressed_length), + chunkHeader.uncompressed_length + ) + ) + : result.value.slice(0, chunkHeader.compressed_length); const range = ranges.find((range) => chunkIndex >= range.start && chunkIndex < range.end); const shouldYield = chunkIndex >= term.range.start && chunkIndex < term.range.end; @@ -439,8 +436,7 @@ export class XetBlob extends Blob { log("done", done, "total read", totalBytesRead, maxBytes, totalFetchBytes); log("failed to fetch all data for term", term.hash); throw new Error( - `Failed to fetch all data for term ${term.hash}, fetched ${totalFetchBytes} bytes out of ${ - fetchInfo.url_range.end - fetchInfo.url_range.start + 1 + `Failed to fetch all data for term ${term.hash}, fetched ${totalFetchBytes} bytes out of ${fetchInfo.url_range.end - fetchInfo.url_range.start + 1 }` ); } @@ -651,8 +647,8 @@ async function getAccessToken( headers: { ...(initialAccessToken ? { - Authorization: `Bearer ${initialAccessToken}`, - } + Authorization: `Bearer ${initialAccessToken}`, + } : {}), }, }); diff --git a/packages/hub/src/utils/combineUint8Arrays.spec.ts b/packages/hub/src/utils/combineUint8Arrays.spec.ts new file mode 100644 index 0000000000..96c806a288 --- /dev/null +++ b/packages/hub/src/utils/combineUint8Arrays.spec.ts @@ -0,0 +1,15 @@ +import { describe, it, expect } from "vitest"; +import { combineUint8Arrays } from "./combineUint8Arrays"; + +describe("combineUint8Arrays", () => { + it.each([ + { a: [], b: [], expected: [] }, + { a: [], b: [1, 2, 3], expected: [1, 2, 3] }, + { a: [4, 5, 6], b: [], expected: [4, 5, 6] }, + { a: [7, 8], b: [9, 10], expected: [7, 8, 9, 10] }, + { a: [1], b: [2, 3, 4], expected: [1, 2, 3, 4] }, + ])("combines $a and $b to $expected", ({ a, b, expected }) => { + const result = combineUint8Arrays(new Uint8Array(a), new Uint8Array(b)); + expect(result).toEqual(new Uint8Array(expected)); + }); +}); diff --git a/packages/hub/src/utils/combineUint8Arrays.ts b/packages/hub/src/utils/combineUint8Arrays.ts new file mode 100644 index 0000000000..c5ff60571f --- /dev/null +++ b/packages/hub/src/utils/combineUint8Arrays.ts @@ -0,0 +1,10 @@ +export function combineUint8Arrays( + a: Uint8Array, + b: Uint8Array +): Uint8Array { + const aLength = a.length; + const combinedBytes = new Uint8Array(aLength + b.length); + combinedBytes.set(a); + combinedBytes.set(b, aLength); + return combinedBytes; +} From 688dd779246947f5615ca528f81d796dcce73216 Mon Sep 17 00:00:00 2001 From: Vivian Date: Mon, 1 Sep 2025 09:13:53 +0200 Subject: [PATCH 3/3] chore: format issue --- packages/hub/src/utils/XetBlob.ts | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/hub/src/utils/XetBlob.ts b/packages/hub/src/utils/XetBlob.ts index 266bac3f50..027477c4a2 100644 --- a/packages/hub/src/utils/XetBlob.ts +++ b/packages/hub/src/utils/XetBlob.ts @@ -202,7 +202,7 @@ export class XetBlob extends Blob { rangeList.add(term.range.start, term.range.end); } const listener = this.listener; - const log = this.internalLogging ? (...args: unknown[]) => console.log(...args) : () => { }; + const log = this.internalLogging ? (...args: unknown[]) => console.log(...args) : () => {}; async function* readData( reconstructionInfo: ReconstructionInfo, @@ -359,7 +359,8 @@ export class XetBlob extends Blob { chunkHeader.compression_scheme !== XetChunkCompressionScheme.ByteGroupingLZ4 ) { throw new Error( - `Unsupported compression scheme ${compressionSchemeLabels[chunkHeader.compression_scheme] ?? chunkHeader.compression_scheme + `Unsupported compression scheme ${ + compressionSchemeLabels[chunkHeader.compression_scheme] ?? chunkHeader.compression_scheme }` ); } @@ -376,13 +377,13 @@ export class XetBlob extends Blob { chunkHeader.compression_scheme === XetChunkCompressionScheme.LZ4 ? lz4_decompress(result.value.slice(0, chunkHeader.compressed_length), chunkHeader.uncompressed_length) : chunkHeader.compression_scheme === XetChunkCompressionScheme.ByteGroupingLZ4 - ? bg4_regroup_bytes( - lz4_decompress( - result.value.slice(0, chunkHeader.compressed_length), - chunkHeader.uncompressed_length - ) - ) - : result.value.slice(0, chunkHeader.compressed_length); + ? bg4_regroup_bytes( + lz4_decompress( + result.value.slice(0, chunkHeader.compressed_length), + chunkHeader.uncompressed_length + ) + ) + : result.value.slice(0, chunkHeader.compressed_length); const range = ranges.find((range) => chunkIndex >= range.start && chunkIndex < range.end); const shouldYield = chunkIndex >= term.range.start && chunkIndex < term.range.end; @@ -436,7 +437,8 @@ export class XetBlob extends Blob { log("done", done, "total read", totalBytesRead, maxBytes, totalFetchBytes); log("failed to fetch all data for term", term.hash); throw new Error( - `Failed to fetch all data for term ${term.hash}, fetched ${totalFetchBytes} bytes out of ${fetchInfo.url_range.end - fetchInfo.url_range.start + 1 + `Failed to fetch all data for term ${term.hash}, fetched ${totalFetchBytes} bytes out of ${ + fetchInfo.url_range.end - fetchInfo.url_range.start + 1 }` ); } @@ -647,8 +649,8 @@ async function getAccessToken( headers: { ...(initialAccessToken ? { - Authorization: `Bearer ${initialAccessToken}`, - } + Authorization: `Bearer ${initialAccessToken}`, + } : {}), }, });