Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion extractor/filesystem/embeddedfs/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"errors"
"fmt"
"io"
"strings"
"sync"

Expand All @@ -34,6 +35,12 @@ import (
const (
// Name is the unique identifier for the archive extractor.
Name = "embeddedfs/archive"

// maxGzipDecompressedBytes caps total decompressed output for .tar.gz files.
// maxFileSizeBytes only checks the compressed size; without this cap a
// decompression bomb (e.g. 1 MB → 100 GB) exhausts disk space.
// 8 GiB covers the largest realistic OCI image layer.
maxGzipDecompressedBytes = 8 << 30 // 8 GiB
)

// Extractor implements the filesystem.Extractor interface for archive extraction.
Expand Down Expand Up @@ -106,7 +113,9 @@ func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (i
if err != nil {
return inventory.Inventory{}, fmt.Errorf("gzip.NewReader(%q): %w", input.Path, err)
}
tempDir, err = common.TARToTempDir(reader)
// LimitReader caps total decompressed bytes: maxFileSizeBytes checks only
// the compressed size, so a small .tar.gz can expand to exhaust disk space.
tempDir, err = common.TARToTempDir(io.LimitReader(reader, maxGzipDecompressedBytes))
if err != nil {
return inventory.Inventory{}, fmt.Errorf("common.TARToTempDir(%q): %w", input.Path, err)
}
Expand Down
102 changes: 102 additions & 0 deletions extractor/filesystem/embeddedfs/archive/security_regression_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package archive_test

import (
"archive/tar"
"bytes"
"compress/gzip"
"testing"

cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto"
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/archive"
"github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common"
)

// makeTARWithOversizedEntry returns a TAR whose single entry declares a size
// larger than common.MaxTAREntryBytes without containing actual content.
// tar.Writer.Close() returns ErrWriteTooLong here, which we intentionally ignore;
// the header bytes are already in the buffer and that's all we need to exercise
// the per-entry size check in common.TARToTempDir.
func makeTARWithOversizedEntry(t *testing.T) *bytes.Buffer {
t.Helper()
buf := &bytes.Buffer{}
tw := tar.NewWriter(buf)
hdr := &tar.Header{
Name: "bomb.bin",
Mode: 0600,
Size: common.MaxTAREntryBytes + 1,
Typeflag: tar.TypeReg,
}
if err := tw.WriteHeader(hdr); err != nil {
t.Fatalf("tar.WriteHeader: %v", err)
}
// Intentionally omit content bytes; Close will return ErrWriteTooLong but
// the header is already serialised into buf.
_ = tw.Close()
return buf
}

// makeTARGZWithOversizedEntry wraps makeTARWithOversizedEntry in a gzip stream.
func makeTARGZWithOversizedEntry(t *testing.T) *bytes.Buffer {
t.Helper()
raw := makeTARWithOversizedEntry(t)
compressed := &bytes.Buffer{}
gw := gzip.NewWriter(compressed)
if _, err := gw.Write(raw.Bytes()); err != nil {
t.Fatalf("gzip.Write: %v", err)
}
if err := gw.Close(); err != nil {
t.Fatalf("gzip.Close: %v", err)
}
return compressed
}

// TestTAREntryOverLimitRejected verifies that a .tar file whose entry declares a
// size exceeding common.MaxTAREntryBytes is rejected before any disk write.
func TestTAREntryOverLimitRejected(t *testing.T) {
e, err := archive.New(&cpb.PluginConfig{})
if err != nil {
t.Fatalf("archive.New(): %v", err)
}
input := &filesystem.ScanInput{
Path: "test.tar",
Reader: makeTARWithOversizedEntry(t),
}
_, err = e.Extract(t.Context(), input)
if err == nil {
t.Fatal("Extract with oversized TAR entry should return an error, got nil")
}
}

// TestGzippedTAREntryOverLimitRejected verifies that a .tar.gz file whose
// decompressed TAR contains an entry with a declared size exceeding
// common.MaxTAREntryBytes is rejected before any disk write.
// This exercises both the gzip decompression path and the per-entry size guard.
func TestGzippedTAREntryOverLimitRejected(t *testing.T) {
e, err := archive.New(&cpb.PluginConfig{})
if err != nil {
t.Fatalf("archive.New(): %v", err)
}
input := &filesystem.ScanInput{
Path: "test.tar.gz",
Reader: makeTARGZWithOversizedEntry(t),
}
_, err = e.Extract(t.Context(), input)
if err == nil {
t.Fatal("Extract with oversized .tar.gz entry should return an error, got nil")
}
}
25 changes: 22 additions & 3 deletions extractor/filesystem/embeddedfs/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,11 @@ func (fi *fileInfo) Sys() any {
return nil
}

// MaxTAREntryBytes caps individual TAR entry extraction.
// io.Copy on an unchecked tar.Reader allows an attacker-controlled hdr.Size to
// exhaust disk space. 2 GiB covers any realistic single package/image layer.
const MaxTAREntryBytes = 2 << 30 // 2 GiB

// TARToTempDir extracts a tar file into a temporary directory
// that can be used to traverse its contents recursively.
func TARToTempDir(reader io.Reader) (string, error) {
Expand Down Expand Up @@ -694,6 +699,13 @@ loop:
break loop
}
case tar.TypeReg:
// Reject entries that declare more than the per-entry cap.
// Without this check an attacker-crafted TAR header can direct
// io.Copy to write an arbitrarily large file to disk.
if hdr.Size > MaxTAREntryBytes {
extractErr = fmt.Errorf("TAR entry %q size %d exceeds safety limit %d", hdr.Name, hdr.Size, MaxTAREntryBytes)
break loop
}
dir := filepath.Dir(target)
if err := os.MkdirAll(dir, 0755); err != nil {
extractErr = fmt.Errorf("failed to create directory %s: %w", dir, err)
Expand All @@ -704,12 +716,19 @@ loop:
extractErr = fmt.Errorf("failed to create file %s: %w", target, err)
break loop
}
if _, err := io.Copy(outFile, tr); err != nil {
outFile.Close()
// LimitReader is a second defensive layer: even if hdr.Size is 0
// (sparse entry) the copy is bounded to MaxTAREntryBytes+1 so that
// reads beyond the cap return an error rather than filling the disk.
n, err := io.Copy(outFile, io.LimitReader(tr, MaxTAREntryBytes+1))
outFile.Close()
if err != nil {
extractErr = fmt.Errorf("failed to copy file %s: %w", target, err)
break loop
}
outFile.Close()
if n > MaxTAREntryBytes {
extractErr = fmt.Errorf("TAR entry %q exceeded extraction limit %d bytes", hdr.Name, MaxTAREntryBytes)
break loop
}
default:
// Skip other types (symlinks, etc.) for now
}
Expand Down
20 changes: 20 additions & 0 deletions extractor/filesystem/embeddedfs/qcow2/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,26 @@ func parseHeader(reader io.Reader) (*header, []headerExtension, error) {
if h.CompressionType != 0 {
return nil, nil, fmt.Errorf("unsupported compression type: %d (only zlib supported)", h.CompressionType)
}
// QCOW2 spec: ClusterBits must be 9–21 (512 B – 2 MB clusters).
// Unchecked ClusterBits lets an attacker trigger make([]byte, 1<<ClusterBits)
// with values up to 1<<63, causing immediate OOM.
if h.ClusterBits < 9 || h.ClusterBits > 21 {
return nil, nil, fmt.Errorf("ClusterBits %d out of valid range [9, 21]", h.ClusterBits)
}
// Cap L1Size: prevents make([]uint64, L1Size) OOM with large values.
// A 2M-entry L1 table covers ≥64 TiB even at the minimum cluster size.
const maxL1Size = 2 << 20 // 2,097,152
if h.L1Size > maxL1Size {
return nil, nil, fmt.Errorf("L1Size %d exceeds safety limit %d", h.L1Size, maxL1Size)
}
// Cap RefcountTableClusters: readRefcountTable computes
// header.RefcountTableClusters*uint32(clusterSize) in uint32, so large
// values can produce near-4 GB allocations even with valid ClusterBits.
// 64 clusters is generous for any real image (typical images use 1–3).
const maxRefcountTableClusters = 64
if h.RefcountTableClusters > maxRefcountTableClusters {
return nil, nil, fmt.Errorf("RefcountTableClusters %d exceeds safety limit %d", h.RefcountTableClusters, maxRefcountTableClusters)
}

if h.Version >= 3 && h.HeaderLength > 112 {
if _, err := reader.(io.Seeker).Seek(int64(h.HeaderLength), io.SeekStart); err != nil {
Expand Down
104 changes: 104 additions & 0 deletions extractor/filesystem/embeddedfs/qcow2/security_regression_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package qcow2

import (
"encoding/binary"
"os"
"runtime"
"testing"
)

// craftMaliciousQCOW2 builds a minimal 116-byte QCOW2 file whose ClusterBits
// field is set to clusterBits, causing clusterSize = 1 << clusterBits.
// The file passes all parseHeader checks and reaches readL2Table where
// make([]byte, clusterSize) is called with no bounds check.
func craftMaliciousQCOW2(t *testing.T, clusterBits uint32) string {
t.Helper()

// QCOW2 header is 112 bytes, big-endian.
// Offsets match the header struct fields in format.go.
hdr := make([]byte, 112)
be := binary.BigEndian

be.PutUint32(hdr[0:], 0x514649FB) // Magic
be.PutUint32(hdr[4:], 3) // Version = 3
be.PutUint32(hdr[20:], clusterBits) // ClusterBits
be.PutUint64(hdr[24:], 1<<clusterBits) // Size = 1 cluster (loop runs once)
be.PutUint32(hdr[32:], 0) // CryptMethod = 0
be.PutUint32(hdr[36:], 1) // L1Size = 1
be.PutUint64(hdr[40:], 0) // L1TableOffset = 0 (points to file start)
be.PutUint64(hdr[48:], 0) // RefcountTableOffset = 0
be.PutUint32(hdr[56:], 0) // RefcountTableClusters = 0
be.PutUint32(hdr[100:], 112) // HeaderLength = 112
// CompressionType (hdr[104]) = 0
// IncompatibleFeatures (hdr[72:80]) = 0

// 4-byte extension end marker terminates the extension loop.
payload := make([]byte, len(hdr)+4)
copy(payload, hdr)

f, err := os.CreateTemp(t.TempDir(), "scalibr-qcow2-bomb-*.qcow2")
if err != nil {
t.Fatalf("CreateTemp: %v", err)
}
if _, err := f.Write(payload); err != nil {
f.Close()
t.Fatalf("Write: %v", err)
}
f.Close()
return f.Name()
}

// TestConvertQCOW2ClusterBitsRejected is the security regression test.
// A crafted QCOW2 with ClusterBits=33 (8 GB cluster) must be rejected
// immediately with 0 MB TotalAlloc delta.
// Without the fix, clusterSize = 1<<33 = 8 GB is passed to make() inside
// readL2Table with no bounds check, causing an 8 GB heap allocation from a
// 116-byte input file (confirmed live: "runtime: out of memory").
func TestConvertQCOW2ClusterBitsRejected(t *testing.T) {
const clusterBits = 33 // 1<<33 = 8 GB

inputPath := craftMaliciousQCOW2(t, clusterBits)

outFile, err := os.CreateTemp(t.TempDir(), "scalibr-qcow2-out-*.raw")
if err != nil {
t.Fatalf("CreateTemp output: %v", err)
}
outFile.Close()
outputPath := outFile.Name()

t.Logf("Malicious QCOW2: %d bytes on disk", 116)
t.Logf("ClusterBits: %d => clusterSize = %d bytes (~%d MB)",
clusterBits, uint64(1)<<clusterBits, (uint64(1)<<clusterBits)>>20)

var msBefore, msAfter runtime.MemStats
runtime.ReadMemStats(&msBefore)

convertErr := convertQCOW2ToRaw(inputPath, outputPath, "")

runtime.ReadMemStats(&msAfter)
allocMB := (msAfter.TotalAlloc - msBefore.TotalAlloc) >> 20

t.Logf("convertQCOW2ToRaw err: %v", convertErr)
t.Logf("TotalAlloc delta: %d MB", allocMB)

if convertErr == nil {
t.Errorf("expected error for malicious ClusterBits=%d, got nil", clusterBits)
}
if allocMB > 50 {
t.Errorf("TotalAlloc delta %d MB exceeds 50 MB — bounds check is missing or ineffective", allocMB)
}
}
Loading