From 46d374ef8eff476d5941e525f2192b6043d3bc85 Mon Sep 17 00:00:00 2001
From: adilburaksen <adilburaksen@gmail.com>
Date: Mon, 20 Apr 2026 22:02:42 +0300
Subject: [PATCH 1/3] security: validate QCOW2 ClusterBits in parseHeader to
 prevent OOM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ClusterBits is read directly from the untrusted header and used as a
shift exponent: clusterSize = uint64(1) << header.ClusterBits.  This
value is then passed to make() and bytes.Repeat() without any bounds
check.  A 116-byte crafted .qcow2 file with ClusterBits=33 causes
scalibr to attempt an 8 GB heap allocation inside readL2Table, crashing
the process with "runtime: out of memory".

Fix: reject ClusterBits outside the QCOW2 specification range [9, 21]
(512 B – 2 MB clusters) in parseHeader, before any clusterSize is
computed.  After fix: same 116-byte input returns an error immediately
with 0 MB TotalAlloc delta.

Adds security regression test TestConvertQCOW2ClusterBitsRejected.

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 .../filesystem/embeddedfs/qcow2/format.go     |   6 +
 .../qcow2/security_regression_test.go         | 109 ++++++++++++++++++
 2 files changed, 115 insertions(+)
 create mode 100644 extractor/filesystem/embeddedfs/qcow2/security_regression_test.go

diff --git a/extractor/filesystem/embeddedfs/qcow2/format.go b/extractor/filesystem/embeddedfs/qcow2/format.go
index 104fd120c..675d679e2 100644
--- a/extractor/filesystem/embeddedfs/qcow2/format.go
+++ b/extractor/filesystem/embeddedfs/qcow2/format.go
@@ -155,6 +155,12 @@ func parseHeader(reader io.Reader) (*header, []headerExtension, error) {
 	if h.CompressionType != 0 {
 		return nil, nil, fmt.Errorf("unsupported compression type: %d (only zlib supported)", h.CompressionType)
 	}
+	// QCOW2 spec: ClusterBits must be 9–21 (512 B – 2 MB clusters).
+	// An unchecked field lets an attacker trigger make([]byte, 1<<ClusterBits)
+	// with attacker-controlled sizes up to 1<<63, causing immediate OOM.
+	if h.ClusterBits < 9 || h.ClusterBits > 21 {
+		return nil, nil, fmt.Errorf("ClusterBits %d out of valid range [9, 21]", h.ClusterBits)
+	}
 
 	if h.Version >= 3 && h.HeaderLength > 112 {
 		if _, err := reader.(io.Seeker).Seek(int64(h.HeaderLength), io.SeekStart); err != nil {
diff --git a/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go
new file mode 100644
index 000000000..3f0eb9255
--- /dev/null
+++ b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go
@@ -0,0 +1,109 @@
+// Copyright 2026 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package qcow2
+
+import (
+	"encoding/binary"
+	"os"
+	"runtime"
+	"testing"
+)
+
+// craftMaliciousQCOW2 builds a minimal 116-byte QCOW2 file whose ClusterBits
+// field is set to clusterBits, causing clusterSize = 1 << clusterBits.
+// The file passes all parseHeader checks and reaches readL2Table where
+// make([]byte, clusterSize) is called with no bounds check.
+func craftMaliciousQCOW2(t *testing.T, clusterBits uint32) string {
+	t.Helper()
+
+	// QCOW2 header is 112 bytes, big-endian.
+	// Offsets match the header struct fields in format.go.
+	hdr := make([]byte, 112)
+	be := binary.BigEndian
+
+	be.PutUint32(hdr[0:], 0x514649FB) // Magic
+	be.PutUint32(hdr[4:], 3)          // Version = 3
+	// hdr[8:16]  backing_file_offset = 0
+	// hdr[16:20] backing_file_size   = 0
+	be.PutUint32(hdr[20:], clusterBits)        // ClusterBits
+	be.PutUint64(hdr[24:], 1<<clusterBits)     // Size = 1 cluster (loop runs once)
+	be.PutUint32(hdr[32:], 0)                  // CryptMethod = 0
+	be.PutUint32(hdr[36:], 1)                  // L1Size = 1
+	be.PutUint64(hdr[40:], 0)                  // L1TableOffset = 0 (points to file start)
+	be.PutUint64(hdr[48:], 0)                  // RefcountTableOffset = 0
+	be.PutUint32(hdr[56:], 0)                  // RefcountTableClusters = 0 (skips readRefcountTable alloc)
+	be.PutUint32(hdr[100:], 112)               // HeaderLength = 112
+	// hdr[104] CompressionType = 0
+	// hdr[72:80] IncompatibleFeatures = 0
+
+	// 4-byte extension end marker terminates the extension loop.
+	extEnd := []byte{0, 0, 0, 0}
+
+	payload := append(hdr, extEnd...)
+
+	f, err := os.CreateTemp("", "scalibr-qcow2-bomb-*.qcow2")
+	if err != nil {
+		t.Fatalf("CreateTemp: %v", err)
+	}
+	if _, err := f.Write(payload); err != nil {
+		f.Close()
+		os.Remove(f.Name())
+		t.Fatalf("Write: %v", err)
+	}
+	f.Close()
+	return f.Name()
+}
+
+// TestConvertQCOW2ClusterBitsRejected is the security regression test.
+// A crafted QCOW2 with ClusterBits=33 (8 GB cluster) must be rejected
+// immediately after the fix with 0 MB TotalAlloc delta.
+// Without the fix, clusterSize = 1<<33 = 8 GB is passed to make() inside
+// readL2Table with no bounds check, causing an 8+ GB heap allocation from a
+// 116-byte input file.
+func TestConvertQCOW2ClusterBitsRejected(t *testing.T) {
+	const clusterBits = 33 // 1<<33 = 8 GB
+
+	inputPath := craftMaliciousQCOW2(t, clusterBits)
+	defer os.Remove(inputPath)
+
+	outFile, err := os.CreateTemp("", "scalibr-qcow2-out-*.raw")
+	if err != nil {
+		t.Fatalf("CreateTemp output: %v", err)
+	}
+	outFile.Close()
+	outputPath := outFile.Name()
+	defer os.Remove(outputPath)
+
+	t.Logf("Malicious QCOW2: %d bytes on disk", 116)
+	t.Logf("ClusterBits: %d => clusterSize = %d bytes (~%d MB)", clusterBits, uint64(1)<<clusterBits, (uint64(1)<<clusterBits)>>20)
+
+	var msBefore, msAfter runtime.MemStats
+	runtime.ReadMemStats(&msBefore)
+
+	convertErr := convertQCOW2ToRaw(inputPath, outputPath, "")
+
+	runtime.ReadMemStats(&msAfter)
+	allocMB := (msAfter.TotalAlloc - msBefore.TotalAlloc) >> 20
+
+	t.Logf("convertQCOW2ToRaw err: %v", convertErr)
+	t.Logf("TotalAlloc delta: %d MB", allocMB)
+
+	if convertErr == nil {
+		t.Errorf("expected error for malicious ClusterBits=%d, got nil", clusterBits)
+	}
+	if allocMB > 50 {
+		t.Errorf("TotalAlloc delta %d MB exceeds 50 MB — bounds check is missing or ineffective", allocMB)
+	}
+}

From 36260d72a444ca3276b2c69dbb796e2b97d1ed5f Mon Sep 17 00:00:00 2001
From: adilburaksen <adilburaksen@gmail.com>
Date: Mon, 20 Apr 2026 22:12:08 +0300
Subject: [PATCH 2/3] security: fix QCOW2 header field bounds checks to prevent
 OOM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three header fields are used as allocation sizes without validation:

1. ClusterBits — used as shift exponent: 1<<ClusterBits. Value 33 → 8 GB
   make([]byte, 8GB) in readL2Table (format.go:218). Confirmed live:
   116-byte file causes "runtime: out of memory".

2. L1Size — used directly: make([]uint64, L1Size). Value 0x1FFFFFFF
   → 4 GB allocation in readL1Table.

3. RefcountTableClusters — multiplied with uint32(clusterSize) in
   readRefcountTable. With valid ClusterBits=9, value 0x7FFFFF yields
   a ~4 GB uint32 result without overflow.

Fix: reject all three out-of-range values in parseHeader before any
clusterSize or allocation is computed.
- ClusterBits: spec range [9, 21]
- L1Size: cap at 2M entries (covers ≥64 TiB at minimum cluster size)
- RefcountTableClusters: cap at 64 (typical images use 1–3)

Regression test: TestConvertQCOW2ClusterBitsRejected confirms
116-byte malicious input is rejected with 0 MB TotalAlloc delta.
All 38 existing tests pass.

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 .../filesystem/embeddedfs/qcow2/format.go     | 18 ++++++-
 .../qcow2/security_regression_test.go         | 47 +++++++++----------
 2 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/extractor/filesystem/embeddedfs/qcow2/format.go b/extractor/filesystem/embeddedfs/qcow2/format.go
index 675d679e2..3c4081cf9 100644
--- a/extractor/filesystem/embeddedfs/qcow2/format.go
+++ b/extractor/filesystem/embeddedfs/qcow2/format.go
@@ -156,11 +156,25 @@ func parseHeader(reader io.Reader) (*header, []headerExtension, error) {
 		return nil, nil, fmt.Errorf("unsupported compression type: %d (only zlib supported)", h.CompressionType)
 	}
 	// QCOW2 spec: ClusterBits must be 9–21 (512 B – 2 MB clusters).
-	// An unchecked field lets an attacker trigger make([]byte, 1<<ClusterBits)
-	// with attacker-controlled sizes up to 1<<63, causing immediate OOM.
+	// Unchecked ClusterBits lets an attacker trigger make([]byte, 1<<ClusterBits)
+	// with values up to 1<<63, causing immediate OOM.
 	if h.ClusterBits < 9 || h.ClusterBits > 21 {
 		return nil, nil, fmt.Errorf("ClusterBits %d out of valid range [9, 21]", h.ClusterBits)
 	}
+	// Cap L1Size: prevents make([]uint64, L1Size) OOM with large values.
+	// A 2M-entry L1 table covers ≥64 TiB even at the minimum cluster size.
+	const maxL1Size = 2 << 20 // 2,097,152
+	if h.L1Size > maxL1Size {
+		return nil, nil, fmt.Errorf("L1Size %d exceeds safety limit %d", h.L1Size, maxL1Size)
+	}
+	// Cap RefcountTableClusters: readRefcountTable computes
+	// header.RefcountTableClusters*uint32(clusterSize) in uint32, so large
+	// values can produce near-4 GB allocations even with valid ClusterBits.
+	// 64 clusters is generous for any real image (typical images use 1–3).
+	const maxRefcountTableClusters = 64
+	if h.RefcountTableClusters > maxRefcountTableClusters {
+		return nil, nil, fmt.Errorf("RefcountTableClusters %d exceeds safety limit %d", h.RefcountTableClusters, maxRefcountTableClusters)
+	}
 
 	if h.Version >= 3 && h.HeaderLength > 112 {
 		if _, err := reader.(io.Seeker).Seek(int64(h.HeaderLength), io.SeekStart); err != nil {
diff --git a/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go
index 3f0eb9255..de5007b39 100644
--- a/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go
+++ b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go
@@ -33,33 +33,29 @@ func craftMaliciousQCOW2(t *testing.T, clusterBits uint32) string {
 	hdr := make([]byte, 112)
 	be := binary.BigEndian
 
-	be.PutUint32(hdr[0:], 0x514649FB) // Magic
-	be.PutUint32(hdr[4:], 3)          // Version = 3
-	// hdr[8:16]  backing_file_offset = 0
-	// hdr[16:20] backing_file_size   = 0
-	be.PutUint32(hdr[20:], clusterBits)        // ClusterBits
-	be.PutUint64(hdr[24:], 1<<clusterBits)     // Size = 1 cluster (loop runs once)
-	be.PutUint32(hdr[32:], 0)                  // CryptMethod = 0
-	be.PutUint32(hdr[36:], 1)                  // L1Size = 1
-	be.PutUint64(hdr[40:], 0)                  // L1TableOffset = 0 (points to file start)
-	be.PutUint64(hdr[48:], 0)                  // RefcountTableOffset = 0
-	be.PutUint32(hdr[56:], 0)                  // RefcountTableClusters = 0 (skips readRefcountTable alloc)
-	be.PutUint32(hdr[100:], 112)               // HeaderLength = 112
-	// hdr[104] CompressionType = 0
-	// hdr[72:80] IncompatibleFeatures = 0
+	be.PutUint32(hdr[0:], 0x514649FB)      // Magic
+	be.PutUint32(hdr[4:], 3)               // Version = 3
+	be.PutUint32(hdr[20:], clusterBits)    // ClusterBits
+	be.PutUint64(hdr[24:], 1<<clusterBits) // Size = 1 cluster (loop runs once)
+	be.PutUint32(hdr[32:], 0)              // CryptMethod = 0
+	be.PutUint32(hdr[36:], 1)              // L1Size = 1
+	be.PutUint64(hdr[40:], 0)              // L1TableOffset = 0 (points to file start)
+	be.PutUint64(hdr[48:], 0)              // RefcountTableOffset = 0
+	be.PutUint32(hdr[56:], 0)              // RefcountTableClusters = 0
+	be.PutUint32(hdr[100:], 112)           // HeaderLength = 112
+	// CompressionType (hdr[104]) = 0
+	// IncompatibleFeatures (hdr[72:80]) = 0
 
 	// 4-byte extension end marker terminates the extension loop.
-	extEnd := []byte{0, 0, 0, 0}
+	payload := make([]byte, len(hdr)+4)
+	copy(payload, hdr)
 
-	payload := append(hdr, extEnd...)
-
-	f, err := os.CreateTemp("", "scalibr-qcow2-bomb-*.qcow2")
+	f, err := os.CreateTemp(t.TempDir(), "scalibr-qcow2-bomb-*.qcow2")
 	if err != nil {
 		t.Fatalf("CreateTemp: %v", err)
 	}
 	if _, err := f.Write(payload); err != nil {
 		f.Close()
-		os.Remove(f.Name())
 		t.Fatalf("Write: %v", err)
 	}
 	f.Close()
@@ -68,26 +64,25 @@ func craftMaliciousQCOW2(t *testing.T, clusterBits uint32) string {
 
 // TestConvertQCOW2ClusterBitsRejected is the security regression test.
 // A crafted QCOW2 with ClusterBits=33 (8 GB cluster) must be rejected
-// immediately after the fix with 0 MB TotalAlloc delta.
+// immediately with 0 MB TotalAlloc delta.
 // Without the fix, clusterSize = 1<<33 = 8 GB is passed to make() inside
-// readL2Table with no bounds check, causing an 8+ GB heap allocation from a
-// 116-byte input file.
+// readL2Table with no bounds check, causing an 8 GB heap allocation from a
+// 116-byte input file (confirmed live: "runtime: out of memory").
 func TestConvertQCOW2ClusterBitsRejected(t *testing.T) {
 	const clusterBits = 33 // 1<<33 = 8 GB
 
 	inputPath := craftMaliciousQCOW2(t, clusterBits)
-	defer os.Remove(inputPath)
 
-	outFile, err := os.CreateTemp("", "scalibr-qcow2-out-*.raw")
+	outFile, err := os.CreateTemp(t.TempDir(), "scalibr-qcow2-out-*.raw")
 	if err != nil {
 		t.Fatalf("CreateTemp output: %v", err)
 	}
 	outFile.Close()
 	outputPath := outFile.Name()
-	defer os.Remove(outputPath)
 
 	t.Logf("Malicious QCOW2: %d bytes on disk", 116)
-	t.Logf("ClusterBits: %d => clusterSize = %d bytes (~%d MB)", clusterBits, uint64(1)<<clusterBits, (uint64(1)<<clusterBits)>>20)
+	t.Logf("ClusterBits: %d => clusterSize = %d bytes (~%d MB)",
+		clusterBits, uint64(1)<<clusterBits, (uint64(1)<<clusterBits)>>20)
 
 	var msBefore, msAfter runtime.MemStats
 	runtime.ReadMemStats(&msBefore)

From 555b26b629797a1b7fef262b23f758951fceb676 Mon Sep 17 00:00:00 2001
From: adilburaksen <adilburaksen@gmail.com>
Date: Sun, 26 Apr 2026 16:16:29 +0300
Subject: [PATCH 3/3] security: cap TAR entry size and gzip decompression in
 archive/OVA parsers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

common.TARToTempDir had no per-entry size limit: a malicious TAR could
declare an arbitrarily large file (hdr.Size) causing io.Copy to exhaust
disk space.  The archive extractor's .tar.gz path also lacked a total
decompression cap, allowing a compression bomb (e.g. 1 MB → 100 GB) to
exhaust disk on the scanner host.

- extractor/filesystem/embeddedfs/common/common.go
  - Export MaxTAREntryBytes = 2 GiB constant.
  - Check hdr.Size against MaxTAREntryBytes before creating any file.
  - Wrap io.Copy with io.LimitReader(MaxTAREntryBytes+1) and post-copy
    size check as defence-in-depth.

- extractor/filesystem/embeddedfs/archive/archive.go
  - Add maxGzipDecompressedBytes = 8 GiB constant.
  - Wrap gzip.Reader with io.LimitReader before passing to TARToTempDir,
    capping total decompressed output for .tar.gz files.

- extractor/filesystem/embeddedfs/archive/security_regression_test.go
  - TestTAREntryOverLimitRejected: .tar with hdr.Size > MaxTAREntryBytes.
  - TestGzippedTAREntryOverLimitRejected: .tar.gz with same oversized entry.

The OVA extractor calls TARToTempDir directly and is protected by the
MaxTAREntryBytes guard without needing its own changes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../filesystem/embeddedfs/archive/archive.go  |  11 +-
 .../archive/security_regression_test.go       | 102 ++++++++++++++++++
 .../filesystem/embeddedfs/common/common.go    |  25 ++++-
 3 files changed, 134 insertions(+), 4 deletions(-)
 create mode 100644 extractor/filesystem/embeddedfs/archive/security_regression_test.go

diff --git a/extractor/filesystem/embeddedfs/archive/archive.go b/extractor/filesystem/embeddedfs/archive/archive.go
index b6c45bdf7..fb0b75bac 100644
--- a/extractor/filesystem/embeddedfs/archive/archive.go
+++ b/extractor/filesystem/embeddedfs/archive/archive.go
@@ -20,6 +20,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
 	"strings"
 	"sync"
 
@@ -34,6 +35,12 @@ import (
 const (
 	// Name is the unique identifier for the archive extractor.
 	Name = "embeddedfs/archive"
+
+	// maxGzipDecompressedBytes caps total decompressed output for .tar.gz files.
+	// maxFileSizeBytes only checks the compressed size; without this cap a
+	// decompression bomb (e.g. 1 MB → 100 GB) exhausts disk space.
+	// 8 GiB covers the largest realistic OCI image layer.
+	maxGzipDecompressedBytes = 8 << 30 // 8 GiB
 )
 
 // Extractor implements the filesystem.Extractor interface for archive extraction.
@@ -106,7 +113,9 @@ func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (i
 		if err != nil {
 			return inventory.Inventory{}, fmt.Errorf("gzip.NewReader(%q): %w", input.Path, err)
 		}
-		tempDir, err = common.TARToTempDir(reader)
+		// LimitReader caps total decompressed bytes: maxFileSizeBytes checks only
+		// the compressed size, so a small .tar.gz can expand to exhaust disk space.
+		tempDir, err = common.TARToTempDir(io.LimitReader(reader, maxGzipDecompressedBytes))
 		if err != nil {
 			return inventory.Inventory{}, fmt.Errorf("common.TARToTempDir(%q): %w", input.Path, err)
 		}
diff --git a/extractor/filesystem/embeddedfs/archive/security_regression_test.go b/extractor/filesystem/embeddedfs/archive/security_regression_test.go
new file mode 100644
index 000000000..c2126cb91
--- /dev/null
+++ b/extractor/filesystem/embeddedfs/archive/security_regression_test.go
@@ -0,0 +1,102 @@
+// Copyright 2026 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package archive_test
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"testing"
+
+	cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto"
+	"github.com/google/osv-scalibr/extractor/filesystem"
+	"github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/archive"
+	"github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common"
+)
+
+// makeTARWithOversizedEntry returns a TAR whose single entry declares a size
+// larger than common.MaxTAREntryBytes without containing actual content.
+// tar.Writer.Close() returns ErrWriteTooLong here, which we intentionally ignore;
+// the header bytes are already in the buffer and that's all we need to exercise
+// the per-entry size check in common.TARToTempDir.
+func makeTARWithOversizedEntry(t *testing.T) *bytes.Buffer {
+	t.Helper()
+	buf := &bytes.Buffer{}
+	tw := tar.NewWriter(buf)
+	hdr := &tar.Header{
+		Name:     "bomb.bin",
+		Mode:     0600,
+		Size:     common.MaxTAREntryBytes + 1,
+		Typeflag: tar.TypeReg,
+	}
+	if err := tw.WriteHeader(hdr); err != nil {
+		t.Fatalf("tar.WriteHeader: %v", err)
+	}
+	// Intentionally omit content bytes; Close will return ErrWriteTooLong but
+	// the header is already serialised into buf.
+	_ = tw.Close()
+	return buf
+}
+
+// makeTARGZWithOversizedEntry wraps makeTARWithOversizedEntry in a gzip stream.
+func makeTARGZWithOversizedEntry(t *testing.T) *bytes.Buffer {
+	t.Helper()
+	raw := makeTARWithOversizedEntry(t)
+	compressed := &bytes.Buffer{}
+	gw := gzip.NewWriter(compressed)
+	if _, err := gw.Write(raw.Bytes()); err != nil {
+		t.Fatalf("gzip.Write: %v", err)
+	}
+	if err := gw.Close(); err != nil {
+		t.Fatalf("gzip.Close: %v", err)
+	}
+	return compressed
+}
+
+// TestTAREntryOverLimitRejected verifies that a .tar file whose entry declares a
+// size exceeding common.MaxTAREntryBytes is rejected before any disk write.
+func TestTAREntryOverLimitRejected(t *testing.T) {
+	e, err := archive.New(&cpb.PluginConfig{})
+	if err != nil {
+		t.Fatalf("archive.New(): %v", err)
+	}
+	input := &filesystem.ScanInput{
+		Path:   "test.tar",
+		Reader: makeTARWithOversizedEntry(t),
+	}
+	_, err = e.Extract(t.Context(), input)
+	if err == nil {
+		t.Fatal("Extract with oversized TAR entry should return an error, got nil")
+	}
+}
+
+// TestGzippedTAREntryOverLimitRejected verifies that a .tar.gz file whose
+// decompressed TAR contains an entry with a declared size exceeding
+// common.MaxTAREntryBytes is rejected before any disk write.
+// This exercises both the gzip decompression path and the per-entry size guard.
+func TestGzippedTAREntryOverLimitRejected(t *testing.T) {
+	e, err := archive.New(&cpb.PluginConfig{})
+	if err != nil {
+		t.Fatalf("archive.New(): %v", err)
+	}
+	input := &filesystem.ScanInput{
+		Path:   "test.tar.gz",
+		Reader: makeTARGZWithOversizedEntry(t),
+	}
+	_, err = e.Extract(t.Context(), input)
+	if err == nil {
+		t.Fatal("Extract with oversized .tar.gz entry should return an error, got nil")
+	}
+}
diff --git a/extractor/filesystem/embeddedfs/common/common.go b/extractor/filesystem/embeddedfs/common/common.go
index 2d5f473f4..53c9cccac 100644
--- a/extractor/filesystem/embeddedfs/common/common.go
+++ b/extractor/filesystem/embeddedfs/common/common.go
@@ -658,6 +658,11 @@ func (fi *fileInfo) Sys() any {
 	return nil
 }
 
+// MaxTAREntryBytes caps individual TAR entry extraction.
+// io.Copy on an unchecked tar.Reader allows an attacker-controlled hdr.Size to
+// exhaust disk space. 2 GiB covers any realistic single package/image layer.
+const MaxTAREntryBytes = 2 << 30 // 2 GiB
+
 // TARToTempDir extracts a tar file into a temporary directory
 // that can be used to traverse its contents recursively.
 func TARToTempDir(reader io.Reader) (string, error) {
@@ -694,6 +699,13 @@ loop:
 				break loop
 			}
 		case tar.TypeReg:
+			// Reject entries that declare more than the per-entry cap.
+			// Without this check an attacker-crafted TAR header can direct
+			// io.Copy to write an arbitrarily large file to disk.
+			if hdr.Size > MaxTAREntryBytes {
+				extractErr = fmt.Errorf("TAR entry %q size %d exceeds safety limit %d", hdr.Name, hdr.Size, MaxTAREntryBytes)
+				break loop
+			}
 			dir := filepath.Dir(target)
 			if err := os.MkdirAll(dir, 0755); err != nil {
 				extractErr = fmt.Errorf("failed to create directory %s: %w", dir, err)
@@ -704,12 +716,19 @@ loop:
 				extractErr = fmt.Errorf("failed to create file %s: %w", target, err)
 				break loop
 			}
-			if _, err := io.Copy(outFile, tr); err != nil {
-				outFile.Close()
+			// LimitReader is a second defensive layer: even if hdr.Size is 0
+			// (sparse entry) the copy is bounded to MaxTAREntryBytes+1 so that
+			// reads beyond the cap return an error rather than filling the disk.
+			n, err := io.Copy(outFile, io.LimitReader(tr, MaxTAREntryBytes+1))
+			outFile.Close()
+			if err != nil {
 				extractErr = fmt.Errorf("failed to copy file %s: %w", target, err)
 				break loop
 			}
-			outFile.Close()
+			if n > MaxTAREntryBytes {
+				extractErr = fmt.Errorf("TAR entry %q exceeded extraction limit %d bytes", hdr.Name, MaxTAREntryBytes)
+				break loop
+			}
 		default:
 			// Skip other types (symlinks, etc.) for now
 		}