diff --git a/extractor/filesystem/embeddedfs/archive/archive.go b/extractor/filesystem/embeddedfs/archive/archive.go index b6c45bdf7..fb0b75bac 100644 --- a/extractor/filesystem/embeddedfs/archive/archive.go +++ b/extractor/filesystem/embeddedfs/archive/archive.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "io" "strings" "sync" @@ -34,6 +35,12 @@ import ( const ( // Name is the unique identifier for the archive extractor. Name = "embeddedfs/archive" + + // maxGzipDecompressedBytes caps total decompressed output for .tar.gz files. + // maxFileSizeBytes only checks the compressed size; without this cap a + // decompression bomb (e.g. 1 MB → 100 GB) exhausts disk space. + // 8 GiB covers the largest realistic OCI image layer. + maxGzipDecompressedBytes = 8 << 30 // 8 GiB ) // Extractor implements the filesystem.Extractor interface for archive extraction. @@ -106,7 +113,9 @@ func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (i if err != nil { return inventory.Inventory{}, fmt.Errorf("gzip.NewReader(%q): %w", input.Path, err) } - tempDir, err = common.TARToTempDir(reader) + // LimitReader caps total decompressed bytes: maxFileSizeBytes checks only + // the compressed size, so a small .tar.gz can expand to exhaust disk space. + tempDir, err = common.TARToTempDir(io.LimitReader(reader, maxGzipDecompressedBytes)) if err != nil { return inventory.Inventory{}, fmt.Errorf("common.TARToTempDir(%q): %w", input.Path, err) } diff --git a/extractor/filesystem/embeddedfs/archive/security_regression_test.go b/extractor/filesystem/embeddedfs/archive/security_regression_test.go new file mode 100644 index 000000000..c2126cb91 --- /dev/null +++ b/extractor/filesystem/embeddedfs/archive/security_regression_test.go @@ -0,0 +1,102 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package archive_test + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "testing" + + cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/archive" + "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common" +) + +// makeTARWithOversizedEntry returns a TAR whose single entry declares a size +// larger than common.MaxTAREntryBytes without containing actual content. +// tar.Writer.Close() returns ErrWriteTooLong here, which we intentionally ignore; +// the header bytes are already in the buffer and that's all we need to exercise +// the per-entry size check in common.TARToTempDir. +func makeTARWithOversizedEntry(t *testing.T) *bytes.Buffer { + t.Helper() + buf := &bytes.Buffer{} + tw := tar.NewWriter(buf) + hdr := &tar.Header{ + Name: "bomb.bin", + Mode: 0600, + Size: common.MaxTAREntryBytes + 1, + Typeflag: tar.TypeReg, + } + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("tar.WriteHeader: %v", err) + } + // Intentionally omit content bytes; Close will return ErrWriteTooLong but + // the header is already serialised into buf. + _ = tw.Close() + return buf +} + +// makeTARGZWithOversizedEntry wraps makeTARWithOversizedEntry in a gzip stream. +func makeTARGZWithOversizedEntry(t *testing.T) *bytes.Buffer { + t.Helper() + raw := makeTARWithOversizedEntry(t) + compressed := &bytes.Buffer{} + gw := gzip.NewWriter(compressed) + if _, err := gw.Write(raw.Bytes()); err != nil { + t.Fatalf("gzip.Write: %v", err) + } + if err := gw.Close(); err != nil { + t.Fatalf("gzip.Close: %v", err) + } + return compressed +} + +// TestTAREntryOverLimitRejected verifies that a .tar file whose entry declares a +// size exceeding common.MaxTAREntryBytes is rejected before any disk write. +func TestTAREntryOverLimitRejected(t *testing.T) { + e, err := archive.New(&cpb.PluginConfig{}) + if err != nil { + t.Fatalf("archive.New(): %v", err) + } + input := &filesystem.ScanInput{ + Path: "test.tar", + Reader: makeTARWithOversizedEntry(t), + } + _, err = e.Extract(t.Context(), input) + if err == nil { + t.Fatal("Extract with oversized TAR entry should return an error, got nil") + } +} + +// TestGzippedTAREntryOverLimitRejected verifies that a .tar.gz file whose +// decompressed TAR contains an entry with a declared size exceeding +// common.MaxTAREntryBytes is rejected before any disk write. +// This exercises both the gzip decompression path and the per-entry size guard. +func TestGzippedTAREntryOverLimitRejected(t *testing.T) { + e, err := archive.New(&cpb.PluginConfig{}) + if err != nil { + t.Fatalf("archive.New(): %v", err) + } + input := &filesystem.ScanInput{ + Path: "test.tar.gz", + Reader: makeTARGZWithOversizedEntry(t), + } + _, err = e.Extract(t.Context(), input) + if err == nil { + t.Fatal("Extract with oversized .tar.gz entry should return an error, got nil") + } +} diff --git a/extractor/filesystem/embeddedfs/common/common.go b/extractor/filesystem/embeddedfs/common/common.go index 2d5f473f4..53c9cccac 100644 --- a/extractor/filesystem/embeddedfs/common/common.go +++ b/extractor/filesystem/embeddedfs/common/common.go @@ -658,6 +658,11 @@ func (fi *fileInfo) Sys() any { return nil } +// MaxTAREntryBytes caps individual TAR entry extraction. +// io.Copy on an unchecked tar.Reader allows an attacker-controlled hdr.Size to +// exhaust disk space. 2 GiB covers any realistic single package/image layer. +const MaxTAREntryBytes = 2 << 30 // 2 GiB + // TARToTempDir extracts a tar file into a temporary directory // that can be used to traverse its contents recursively. func TARToTempDir(reader io.Reader) (string, error) { @@ -694,6 +699,13 @@ loop: break loop } case tar.TypeReg: + // Reject entries that declare more than the per-entry cap. + // Without this check an attacker-crafted TAR header can direct + // io.Copy to write an arbitrarily large file to disk. + if hdr.Size > MaxTAREntryBytes { + extractErr = fmt.Errorf("TAR entry %q size %d exceeds safety limit %d", hdr.Name, hdr.Size, MaxTAREntryBytes) + break loop + } dir := filepath.Dir(target) if err := os.MkdirAll(dir, 0755); err != nil { extractErr = fmt.Errorf("failed to create directory %s: %w", dir, err) @@ -704,12 +716,19 @@ loop: extractErr = fmt.Errorf("failed to create file %s: %w", target, err) break loop } - if _, err := io.Copy(outFile, tr); err != nil { - outFile.Close() + // LimitReader is a second defensive layer: even if hdr.Size is 0 + // (sparse entry) the copy is bounded to MaxTAREntryBytes+1 so that + // reads beyond the cap return an error rather than filling the disk. + n, err := io.Copy(outFile, io.LimitReader(tr, MaxTAREntryBytes+1)) + outFile.Close() + if err != nil { extractErr = fmt.Errorf("failed to copy file %s: %w", target, err) break loop } - outFile.Close() + if n > MaxTAREntryBytes { + extractErr = fmt.Errorf("TAR entry %q exceeded extraction limit %d bytes", hdr.Name, MaxTAREntryBytes) + break loop + } default: // Skip other types (symlinks, etc.) for now } diff --git a/extractor/filesystem/embeddedfs/qcow2/format.go b/extractor/filesystem/embeddedfs/qcow2/format.go index 104fd120c..3c4081cf9 100644 --- a/extractor/filesystem/embeddedfs/qcow2/format.go +++ b/extractor/filesystem/embeddedfs/qcow2/format.go @@ -155,6 +155,26 @@ func parseHeader(reader io.Reader) (*header, []headerExtension, error) { if h.CompressionType != 0 { return nil, nil, fmt.Errorf("unsupported compression type: %d (only zlib supported)", h.CompressionType) } + // QCOW2 spec: ClusterBits must be 9–21 (512 B – 2 MB clusters). + // Unchecked ClusterBits lets an attacker trigger make([]byte, 1< 21 { + return nil, nil, fmt.Errorf("ClusterBits %d out of valid range [9, 21]", h.ClusterBits) + } + // Cap L1Size: prevents make([]uint64, L1Size) OOM with large values. + // A 2M-entry L1 table covers ≥64 TiB even at the minimum cluster size. + const maxL1Size = 2 << 20 // 2,097,152 + if h.L1Size > maxL1Size { + return nil, nil, fmt.Errorf("L1Size %d exceeds safety limit %d", h.L1Size, maxL1Size) + } + // Cap RefcountTableClusters: readRefcountTable computes + // header.RefcountTableClusters*uint32(clusterSize) in uint32, so large + // values can produce near-4 GB allocations even with valid ClusterBits. + // 64 clusters is generous for any real image (typical images use 1–3). + const maxRefcountTableClusters = 64 + if h.RefcountTableClusters > maxRefcountTableClusters { + return nil, nil, fmt.Errorf("RefcountTableClusters %d exceeds safety limit %d", h.RefcountTableClusters, maxRefcountTableClusters) + } if h.Version >= 3 && h.HeaderLength > 112 { if _, err := reader.(io.Seeker).Seek(int64(h.HeaderLength), io.SeekStart); err != nil { diff --git a/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go new file mode 100644 index 000000000..de5007b39 --- /dev/null +++ b/extractor/filesystem/embeddedfs/qcow2/security_regression_test.go @@ -0,0 +1,104 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package qcow2 + +import ( + "encoding/binary" + "os" + "runtime" + "testing" +) + +// craftMaliciousQCOW2 builds a minimal 116-byte QCOW2 file whose ClusterBits +// field is set to clusterBits, causing clusterSize = 1 << clusterBits. +// The file passes all parseHeader checks and reaches readL2Table where +// make([]byte, clusterSize) is called with no bounds check. +func craftMaliciousQCOW2(t *testing.T, clusterBits uint32) string { + t.Helper() + + // QCOW2 header is 112 bytes, big-endian. + // Offsets match the header struct fields in format.go. + hdr := make([]byte, 112) + be := binary.BigEndian + + be.PutUint32(hdr[0:], 0x514649FB) // Magic + be.PutUint32(hdr[4:], 3) // Version = 3 + be.PutUint32(hdr[20:], clusterBits) // ClusterBits + be.PutUint64(hdr[24:], 1< clusterSize = %d bytes (~%d MB)", + clusterBits, uint64(1)<>20) + + var msBefore, msAfter runtime.MemStats + runtime.ReadMemStats(&msBefore) + + convertErr := convertQCOW2ToRaw(inputPath, outputPath, "") + + runtime.ReadMemStats(&msAfter) + allocMB := (msAfter.TotalAlloc - msBefore.TotalAlloc) >> 20 + + t.Logf("convertQCOW2ToRaw err: %v", convertErr) + t.Logf("TotalAlloc delta: %d MB", allocMB) + + if convertErr == nil { + t.Errorf("expected error for malicious ClusterBits=%d, got nil", clusterBits) + } + if allocMB > 50 { + t.Errorf("TotalAlloc delta %d MB exceeds 50 MB — bounds check is missing or ineffective", allocMB) + } +}