From 46d374ef8eff476d5941e525f2192b6043d3bc85 Mon Sep 17 00:00:00 2001 From: adilburaksen Date: Mon, 20 Apr 2026 22:02:42 +0300 Subject: [PATCH 1/3] security: validate QCOW2 ClusterBits in parseHeader to prevent OOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ClusterBits is read directly from the untrusted header and used as a shift exponent: clusterSize = uint64(1) << header.ClusterBits. This value is then passed to make() and bytes.Repeat() without any bounds check. A 116-byte crafted .qcow2 file with ClusterBits=33 causes scalibr to attempt an 8 GB heap allocation inside readL2Table, crashing the process with "runtime: out of memory". Fix: reject ClusterBits outside the QCOW2 specification range [9, 21] (512 B – 2 MB clusters) in parseHeader, before any clusterSize is computed. After fix: same 116-byte input returns an error immediately with 0 MB TotalAlloc delta. Adds security regression test TestConvertQCOW2ClusterBitsRejected. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .../filesystem/embeddedfs/qcow2/format.go | 6 + .../qcow2/security_regression_test.go | 109 ++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 extractor/filesystem/embeddedfs/qcow2/security_regression_test.go diff --git a/extractor/filesystem/embeddedfs/qcow2/format.go b/extractor/filesystem/embeddedfs/qcow2/format.go index 104fd120c..675d679e2 100644 --- a/extractor/filesystem/embeddedfs/qcow2/format.go +++ b/extractor/filesystem/embeddedfs/qcow2/format.go @@ -155,6 +155,12 @@ func parseHeader(reader io.Reader) (*header, []headerExtension, error) { if h.CompressionType != 0 { return nil, nil, fmt.Errorf("unsupported compression type: %d (only zlib supported)", h.CompressionType) } + // QCOW2 spec: ClusterBits must be 9–21 (512 B – 2 MB clusters). + // An unchecked field lets an attacker trigger make([]byte, 1< 21 { + return nil, nil, fmt.Errorf("ClusterBits %d out of valid range [9, 21]", h.ClusterBits) + } if h.Version >= 3 && h.HeaderLength > 112 { if _, err := reader.(io.Seeker).Seek(int64(h.HeaderLength), io.SeekStart); err != nil { diff --git a/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go new file mode 100644 index 000000000..3f0eb9255 --- /dev/null +++ b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go @@ -0,0 +1,109 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package qcow2 + +import ( + "encoding/binary" + "os" + "runtime" + "testing" +) + +// craftMaliciousQCOW2 builds a minimal 116-byte QCOW2 file whose ClusterBits +// field is set to clusterBits, causing clusterSize = 1 << clusterBits. +// The file passes all parseHeader checks and reaches readL2Table where +// make([]byte, clusterSize) is called with no bounds check. +func craftMaliciousQCOW2(t *testing.T, clusterBits uint32) string { + t.Helper() + + // QCOW2 header is 112 bytes, big-endian. + // Offsets match the header struct fields in format.go. + hdr := make([]byte, 112) + be := binary.BigEndian + + be.PutUint32(hdr[0:], 0x514649FB) // Magic + be.PutUint32(hdr[4:], 3) // Version = 3 + // hdr[8:16] backing_file_offset = 0 + // hdr[16:20] backing_file_size = 0 + be.PutUint32(hdr[20:], clusterBits) // ClusterBits + be.PutUint64(hdr[24:], 1< clusterSize = %d bytes (~%d MB)", clusterBits, uint64(1)<>20) + + var msBefore, msAfter runtime.MemStats + runtime.ReadMemStats(&msBefore) + + convertErr := convertQCOW2ToRaw(inputPath, outputPath, "") + + runtime.ReadMemStats(&msAfter) + allocMB := (msAfter.TotalAlloc - msBefore.TotalAlloc) >> 20 + + t.Logf("convertQCOW2ToRaw err: %v", convertErr) + t.Logf("TotalAlloc delta: %d MB", allocMB) + + if convertErr == nil { + t.Errorf("expected error for malicious ClusterBits=%d, got nil", clusterBits) + } + if allocMB > 50 { + t.Errorf("TotalAlloc delta %d MB exceeds 50 MB — bounds check is missing or ineffective", allocMB) + } +} From 36260d72a444ca3276b2c69dbb796e2b97d1ed5f Mon Sep 17 00:00:00 2001 From: adilburaksen Date: Mon, 20 Apr 2026 22:12:08 +0300 Subject: [PATCH 2/3] security: fix QCOW2 header field bounds checks to prevent OOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three header fields are used as allocation sizes without validation: 1. ClusterBits — used as shift exponent: 1< --- .../filesystem/embeddedfs/qcow2/format.go | 18 ++++++- .../qcow2/security_regression_test.go | 47 +++++++++---------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/extractor/filesystem/embeddedfs/qcow2/format.go b/extractor/filesystem/embeddedfs/qcow2/format.go index 675d679e2..3c4081cf9 100644 --- a/extractor/filesystem/embeddedfs/qcow2/format.go +++ b/extractor/filesystem/embeddedfs/qcow2/format.go @@ -156,11 +156,25 @@ func parseHeader(reader io.Reader) (*header, []headerExtension, error) { return nil, nil, fmt.Errorf("unsupported compression type: %d (only zlib supported)", h.CompressionType) } // QCOW2 spec: ClusterBits must be 9–21 (512 B – 2 MB clusters). - // An unchecked field lets an attacker trigger make([]byte, 1< 21 { return nil, nil, fmt.Errorf("ClusterBits %d out of valid range [9, 21]", h.ClusterBits) } + // Cap L1Size: prevents make([]uint64, L1Size) OOM with large values. + // A 2M-entry L1 table covers ≥64 TiB even at the minimum cluster size. + const maxL1Size = 2 << 20 // 2,097,152 + if h.L1Size > maxL1Size { + return nil, nil, fmt.Errorf("L1Size %d exceeds safety limit %d", h.L1Size, maxL1Size) + } + // Cap RefcountTableClusters: readRefcountTable computes + // header.RefcountTableClusters*uint32(clusterSize) in uint32, so large + // values can produce near-4 GB allocations even with valid ClusterBits. + // 64 clusters is generous for any real image (typical images use 1–3). + const maxRefcountTableClusters = 64 + if h.RefcountTableClusters > maxRefcountTableClusters { + return nil, nil, fmt.Errorf("RefcountTableClusters %d exceeds safety limit %d", h.RefcountTableClusters, maxRefcountTableClusters) + } if h.Version >= 3 && h.HeaderLength > 112 { if _, err := reader.(io.Seeker).Seek(int64(h.HeaderLength), io.SeekStart); err != nil { diff --git a/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go index 3f0eb9255..de5007b39 100644 --- a/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go +++ b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go @@ -33,33 +33,29 @@ func craftMaliciousQCOW2(t *testing.T, clusterBits uint32) string { hdr := make([]byte, 112) be := binary.BigEndian - be.PutUint32(hdr[0:], 0x514649FB) // Magic - be.PutUint32(hdr[4:], 3) // Version = 3 - // hdr[8:16] backing_file_offset = 0 - // hdr[16:20] backing_file_size = 0 - be.PutUint32(hdr[20:], clusterBits) // ClusterBits - be.PutUint64(hdr[24:], 1< clusterSize = %d bytes (~%d MB)", clusterBits, uint64(1)<>20) + t.Logf("ClusterBits: %d => clusterSize = %d bytes (~%d MB)", + clusterBits, uint64(1)<>20) var msBefore, msAfter runtime.MemStats runtime.ReadMemStats(&msBefore) From 555b26b629797a1b7fef262b23f758951fceb676 Mon Sep 17 00:00:00 2001 From: adilburaksen Date: Sun, 26 Apr 2026 16:16:29 +0300 Subject: [PATCH 3/3] security: cap TAR entry size and gzip decompression in archive/OVA parsers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit common.TARToTempDir had no per-entry size limit: a malicious TAR could declare an arbitrarily large file (hdr.Size) causing io.Copy to exhaust disk space. The archive extractor's .tar.gz path also lacked a total decompression cap, allowing a compression bomb (e.g. 1 MB → 100 GB) to exhaust disk on the scanner host. - extractor/filesystem/embeddedfs/common/common.go - Export MaxTAREntryBytes = 2 GiB constant. - Check hdr.Size against MaxTAREntryBytes before creating any file. - Wrap io.Copy with io.LimitReader(MaxTAREntryBytes+1) and post-copy size check as defence-in-depth. - extractor/filesystem/embeddedfs/archive/archive.go - Add maxGzipDecompressedBytes = 8 GiB constant. - Wrap gzip.Reader with io.LimitReader before passing to TARToTempDir, capping total decompressed output for .tar.gz files. - extractor/filesystem/embeddedfs/archive/security_regression_test.go - TestTAREntryOverLimitRejected: .tar with hdr.Size > MaxTAREntryBytes. - TestGzippedTAREntryOverLimitRejected: .tar.gz with same oversized entry. The OVA extractor calls TARToTempDir directly and is protected by the MaxTAREntryBytes guard without needing its own changes. Co-Authored-By: Claude Sonnet 4.6 --- .../filesystem/embeddedfs/archive/archive.go | 11 +- .../archive/security_regression_test.go | 102 ++++++++++++++++++ .../filesystem/embeddedfs/common/common.go | 25 ++++- 3 files changed, 134 insertions(+), 4 deletions(-) create mode 100644 extractor/filesystem/embeddedfs/archive/security_regression_test.go diff --git a/extractor/filesystem/embeddedfs/archive/archive.go b/extractor/filesystem/embeddedfs/archive/archive.go index b6c45bdf7..fb0b75bac 100644 --- a/extractor/filesystem/embeddedfs/archive/archive.go +++ b/extractor/filesystem/embeddedfs/archive/archive.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "io" "strings" "sync" @@ -34,6 +35,12 @@ import ( const ( // Name is the unique identifier for the archive extractor. Name = "embeddedfs/archive" + + // maxGzipDecompressedBytes caps total decompressed output for .tar.gz files. + // maxFileSizeBytes only checks the compressed size; without this cap a + // decompression bomb (e.g. 1 MB → 100 GB) exhausts disk space. + // 8 GiB covers the largest realistic OCI image layer. + maxGzipDecompressedBytes = 8 << 30 // 8 GiB ) // Extractor implements the filesystem.Extractor interface for archive extraction. @@ -106,7 +113,9 @@ func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (i if err != nil { return inventory.Inventory{}, fmt.Errorf("gzip.NewReader(%q): %w", input.Path, err) } - tempDir, err = common.TARToTempDir(reader) + // LimitReader caps total decompressed bytes: maxFileSizeBytes checks only + // the compressed size, so a small .tar.gz can expand to exhaust disk space. + tempDir, err = common.TARToTempDir(io.LimitReader(reader, maxGzipDecompressedBytes)) if err != nil { return inventory.Inventory{}, fmt.Errorf("common.TARToTempDir(%q): %w", input.Path, err) } diff --git a/extractor/filesystem/embeddedfs/archive/security_regression_test.go b/extractor/filesystem/embeddedfs/archive/security_regression_test.go new file mode 100644 index 000000000..c2126cb91 --- /dev/null +++ b/extractor/filesystem/embeddedfs/archive/security_regression_test.go @@ -0,0 +1,102 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package archive_test + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "testing" + + cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/archive" + "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common" +) + +// makeTARWithOversizedEntry returns a TAR whose single entry declares a size +// larger than common.MaxTAREntryBytes without containing actual content. +// tar.Writer.Close() returns ErrWriteTooLong here, which we intentionally ignore; +// the header bytes are already in the buffer and that's all we need to exercise +// the per-entry size check in common.TARToTempDir. +func makeTARWithOversizedEntry(t *testing.T) *bytes.Buffer { + t.Helper() + buf := &bytes.Buffer{} + tw := tar.NewWriter(buf) + hdr := &tar.Header{ + Name: "bomb.bin", + Mode: 0600, + Size: common.MaxTAREntryBytes + 1, + Typeflag: tar.TypeReg, + } + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("tar.WriteHeader: %v", err) + } + // Intentionally omit content bytes; Close will return ErrWriteTooLong but + // the header is already serialised into buf. + _ = tw.Close() + return buf +} + +// makeTARGZWithOversizedEntry wraps makeTARWithOversizedEntry in a gzip stream. +func makeTARGZWithOversizedEntry(t *testing.T) *bytes.Buffer { + t.Helper() + raw := makeTARWithOversizedEntry(t) + compressed := &bytes.Buffer{} + gw := gzip.NewWriter(compressed) + if _, err := gw.Write(raw.Bytes()); err != nil { + t.Fatalf("gzip.Write: %v", err) + } + if err := gw.Close(); err != nil { + t.Fatalf("gzip.Close: %v", err) + } + return compressed +} + +// TestTAREntryOverLimitRejected verifies that a .tar file whose entry declares a +// size exceeding common.MaxTAREntryBytes is rejected before any disk write. +func TestTAREntryOverLimitRejected(t *testing.T) { + e, err := archive.New(&cpb.PluginConfig{}) + if err != nil { + t.Fatalf("archive.New(): %v", err) + } + input := &filesystem.ScanInput{ + Path: "test.tar", + Reader: makeTARWithOversizedEntry(t), + } + _, err = e.Extract(t.Context(), input) + if err == nil { + t.Fatal("Extract with oversized TAR entry should return an error, got nil") + } +} + +// TestGzippedTAREntryOverLimitRejected verifies that a .tar.gz file whose +// decompressed TAR contains an entry with a declared size exceeding +// common.MaxTAREntryBytes is rejected before any disk write. +// This exercises both the gzip decompression path and the per-entry size guard. +func TestGzippedTAREntryOverLimitRejected(t *testing.T) { + e, err := archive.New(&cpb.PluginConfig{}) + if err != nil { + t.Fatalf("archive.New(): %v", err) + } + input := &filesystem.ScanInput{ + Path: "test.tar.gz", + Reader: makeTARGZWithOversizedEntry(t), + } + _, err = e.Extract(t.Context(), input) + if err == nil { + t.Fatal("Extract with oversized .tar.gz entry should return an error, got nil") + } +} diff --git a/extractor/filesystem/embeddedfs/common/common.go b/extractor/filesystem/embeddedfs/common/common.go index 2d5f473f4..53c9cccac 100644 --- a/extractor/filesystem/embeddedfs/common/common.go +++ b/extractor/filesystem/embeddedfs/common/common.go @@ -658,6 +658,11 @@ func (fi *fileInfo) Sys() any { return nil } +// MaxTAREntryBytes caps individual TAR entry extraction. +// io.Copy on an unchecked tar.Reader allows an attacker-controlled hdr.Size to +// exhaust disk space. 2 GiB covers any realistic single package/image layer. +const MaxTAREntryBytes = 2 << 30 // 2 GiB + // TARToTempDir extracts a tar file into a temporary directory // that can be used to traverse its contents recursively. func TARToTempDir(reader io.Reader) (string, error) { @@ -694,6 +699,13 @@ loop: break loop } case tar.TypeReg: + // Reject entries that declare more than the per-entry cap. + // Without this check an attacker-crafted TAR header can direct + // io.Copy to write an arbitrarily large file to disk. + if hdr.Size > MaxTAREntryBytes { + extractErr = fmt.Errorf("TAR entry %q size %d exceeds safety limit %d", hdr.Name, hdr.Size, MaxTAREntryBytes) + break loop + } dir := filepath.Dir(target) if err := os.MkdirAll(dir, 0755); err != nil { extractErr = fmt.Errorf("failed to create directory %s: %w", dir, err) @@ -704,12 +716,19 @@ loop: extractErr = fmt.Errorf("failed to create file %s: %w", target, err) break loop } - if _, err := io.Copy(outFile, tr); err != nil { - outFile.Close() + // LimitReader is a second defensive layer: even if hdr.Size is 0 + // (sparse entry) the copy is bounded to MaxTAREntryBytes+1 so that + // reads beyond the cap return an error rather than filling the disk. + n, err := io.Copy(outFile, io.LimitReader(tr, MaxTAREntryBytes+1)) + outFile.Close() + if err != nil { extractErr = fmt.Errorf("failed to copy file %s: %w", target, err) break loop } - outFile.Close() + if n > MaxTAREntryBytes { + extractErr = fmt.Errorf("TAR entry %q exceeded extraction limit %d bytes", hdr.Name, MaxTAREntryBytes) + break loop + } default: // Skip other types (symlinks, etc.) for now }