From 6ed90ccd5192b58403561984792b7665bb2bd30b Mon Sep 17 00:00:00 2001 From: Geraint Date: Fri, 27 Jun 2025 13:35:03 +0100 Subject: [PATCH] Allow newlines in PAX header value The `.tar.gz`s generated by Apple sometimes have newlines in the PAX header value, but the previous RegExp didn't allow this. This also checks for the `TextDecoder` class and uses that for UTF-8 decoding if possible, since it's faster and also covers a few more edge-cases. --- src/untar-worker.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/untar-worker.js b/src/untar-worker.js index 743597a..c972e65 100644 --- a/src/untar-worker.js +++ b/src/untar-worker.js @@ -57,6 +57,8 @@ if (typeof self !== "undefined") { // Source: https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330 // Unmarshals an Uint8Array to string. function decodeUTF8(bytes) { + // use built-in one if possible + if (typeof TextDecoder === 'function') return new TextDecoder().decode(bytes); var s = ''; var i = 0; while (i < bytes.length) { @@ -106,7 +108,7 @@ PaxHeader.parse = function(buffer) { // Decode bytes up to the first space character; that is the total field length var fieldLength = parseInt(decodeUTF8(bytes.subarray(0, bytes.indexOf(0x20)))); var fieldText = decodeUTF8(bytes.subarray(0, fieldLength)); - var fieldMatch = fieldText.match(/^\d+ ([^=]+)=(.*)\n$/); + var fieldMatch = fieldText.match(/^\d+ ([^=]+)=((.|\r|\n)*)\n$/); if (fieldMatch === null) { throw new Error("Invalid PAX header data format.");