diff --git a/go.mod b/go.mod index a91796a9c..8ffbcf8ee 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.25.0 require ( github.com/Masterminds/semver v1.5.0 github.com/doug-martin/goqu/v8 v8.6.0 + github.com/ebitengine/purego v0.9.0 github.com/google/go-cmp v0.7.0 github.com/google/uuid v1.6.0 github.com/jackc/pgx/v5 v5.9.1 diff --git a/go.sum b/go.sum index 4760759ac..0324dee0e 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/doug-martin/goqu/v8 v8.6.0 h1:KWuDGL135poBgY+SceArvOtIIEpieNKgIZCvger github.com/doug-martin/goqu/v8 v8.6.0/go.mod h1:wiiYWkiguNXK5d4kGIkYmOxBScEL37d9Cfv9tXhPsTk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/ebitengine/purego v0.9.0 h1:mh0zpKBIXDceC63hpvPuGLiJ8ZAa3DfrFTudmfi8A4k= +github.com/ebitengine/purego v0.9.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= diff --git a/internal/guestfs/fs.go b/internal/guestfs/fs.go new file mode 100644 index 000000000..eb8f6952a --- /dev/null +++ b/internal/guestfs/fs.go @@ -0,0 +1,413 @@ +package guestfs + +import ( + "bytes" + "context" + "errors" + "io" + "io/fs" + "log/slog" + "path" + "runtime" + "slices" + "strings" + "sync" + "sync/atomic" + "time" + "unsafe" +) + +type fsCache struct { + dirent sync.Map // map[string]*dirent + fileinfo sync.Map // map[string]*fileinfo + contents sync.Map // map[string]*[]byte +} + +func (c *fsCache) Clear() { + c.dirent.Clear() + c.fileinfo.Clear() + c.contents.Clear() +} + +var ( + _ fs.FS = (*FS)(nil) + _ fs.StatFS = (*FS)(nil) + _ fs.ReadDirFS = (*FS)(nil) + _ fs.ReadFileFS = (*FS)(nil) +) + +// FS implements [fs.FS]. +type FS struct { + g guestfs + closed *atomic.Bool + cache fsCache +} + +// Open mounts the filesystem image (a file containing just a filesystem, i.e. +// no partition table) and returns an [fs.FS] implementation for examining it. +// +// The returned [*FS] may panic if not closed. +func Open(ctx context.Context, path string) (*FS, error) { + sys := new(FS) + if err := errors.Join(loadLibC(), loadLib()); err != nil { + slog.DebugContext(ctx, "unable to do setup", "reason", err) + return nil, errors.ErrUnsupported + } + + g, err := newGuestfs() + if err != nil { + return nil, err + } + closed := new(atomic.Bool) + + // The cleanup closure holds an extra pointer to the "closed" bool, so it + // will outlive the "sys" pointer. An atomic probably isn't strictly + // necessary (there should only ever be two live pointers, and this one is + // only used after the one stored in "sys" is gone), but I didn't want to + // verify that. + runtime.AddCleanup(sys, func(g guestfs) { + if closed.CompareAndSwap(false, true) { + lib.Close(g) + } + }, g) + + sys.g = g + sys.closed = closed + + slog.DebugContext(ctx, "appliance launching") + if err := addDrive(sys.g, path); err != nil { + return nil, err + } + if err := launch(sys.g); err != nil { + return nil, err + } + slog.DebugContext(ctx, "appliance launched") + if err := mount(sys.g, "/dev/sda", "/"); err != nil { + return nil, err + } + slog.DebugContext(ctx, "fs mounted") + + return sys, nil +} + +// Close releases held resources. +// +// Any [fs.File]s returned by the receiver should not be used after this method +// is called. +func (sys *FS) Close() error { + // Eagerly deref pointers in the caches. + sys.cache.Clear() + if sys.closed.CompareAndSwap(false, true) { + lib.Close(sys.g) + } + return nil +} + +// ToAbs translates a name from [fs.FS] convention (always relative to the root) +// to the guestfs convention (always absolute). +func toAbs(name string) string { + return "/" + path.Clean(name) +} + +// All the various fs method implementation are implemented as an exported +// version that expects [fs.FS] paths and an unexported version that expects +// guestfs (absolute) paths. The convention to follow is: +// - File names are called "name". +// - Paths in structs are guestfs paths, called "path". + +// Open implements [fs.FS]. +func (sys *FS) Open(name string) (fs.File, error) { + if !fs.ValidPath(name) { + return nil, fs.ErrInvalid + } + + return sys.open(toAbs(name)) +} + +func (sys *FS) open(path string) (fs.File, error) { + stat, err := sys.stat(path) + if err != nil { + return nil, err + } + + return &file{ + sys: sys, + stat: stat, + path: path, + }, nil +} + +var ( + _ fs.File = (*file)(nil) + _ fs.ReadDirFile = (*file)(nil) + _ io.Reader = (*file)(nil) + _ io.ReaderAt = (*file)(nil) +) + +// File is the struct backing returned [fs.File]s. +// +// If [Read] is called, the file contents are pulled into memory in their +// entirety. +type file struct { + sys *FS + stat fs.FileInfo + path string + contents *guestfsFile + reader *bytes.Reader + dirents []fs.DirEntry +} + +// Close implements [fs.File]. +func (f *file) Close() error { + *f = file{} + return nil +} + +// Stat implements [fs.File]. +func (f *file) Stat() (fs.FileInfo, error) { return f.stat, nil } + +// ReadDir implements [fs.ReadDirFile]. +// +// BUG(hank) ReadDir currently does not respect the "n" argument and always +// returns the entire directory contents. +func (f *file) ReadDir(n int) ([]fs.DirEntry, error) { + if f.dirents == nil { + var err error + f.dirents, err = f.sys.readDir(f.path) + if err != nil { + return nil, err + } + } + var r []fs.DirEntry + if n <= 0 { + r, f.dirents = f.dirents, f.dirents[len(f.dirents):] + } else { + i := min(n, len(f.dirents)) + r, f.dirents = f.dirents[:i], f.dirents[i:] + } + return r, nil +} + +// Read implements [io.Reader]. +// +// Calling Read pulls the entire file contents into memory. +func (f *file) Read(b []byte) (int, error) { + if f.reader == nil { + name := f.path + cache := &f.sys.cache.contents + v, loaded := cache.Load(name) + if !loaded { + rd, err := readFile(f.sys.g, name) + if err != nil { + return 0, err + } + v, _ = cache.LoadOrStore(name, rd) + } + f.contents = v.(*guestfsFile) + f.reader = bytes.NewReader(f.contents.data) + } + return f.reader.Read(b) +} + +// ReadAt implements [io.ReaderAt]. +// +// BUG(hank) The underlying [guestfs_pread(3)] call used for the [io.ReaderAt] +// implementation is only more efficient (due to calling convention switch and +// buffer copies) if the data is actually being processed piece-wise and large +// buffers (e.g. 2 MiB) are used. +// +// [guestfs_pread(3)]: https://libguestfs.org/guestfs.3.html#guestfs_pread +func (f *file) ReadAt(b []byte, offset int64) (int, error) { + if f.reader == nil { + return pread(f.sys.g, f.path, b, offset) + } + return f.reader.ReadAt(b, offset) +} + +// Stat implements [fs.StatFS]. +func (sys *FS) Stat(name string) (fs.FileInfo, error) { + if !fs.ValidPath(name) { + return nil, fs.ErrInvalid + } + return sys.stat(toAbs(name)) +} + +func (sys *FS) stat(path string) (fs.FileInfo, error) { + v, loaded := sys.cache.fileinfo.Load(path) + if !loaded { + fi, err := statns(sys.g, path) + if err != nil { + return nil, err + } + fi.sys = sys + v, _ = sys.cache.fileinfo.LoadOrStore(path, fi) + } + return v.(*fileinfo), nil +} + +type fileinfo struct { + sys *FS + path string + statns *guestfsStatns +} + +// IsDir implements [fs.FileInfo]. +func (f *fileinfo) IsDir() bool { + //revive:disable:var-naming This breaks Go convention to mirror the C name. + const S_IFDIR = 0o040000 + //revive:enable:var-naming + return (f.statns.mode & S_IFDIR) != 0 +} + +// ModTime implements [fs.FileInfo]. +func (f *fileinfo) ModTime() time.Time { + return time.Unix(f.statns.mtime_sec, f.statns.mtime_nsec) +} + +// Mode implements [fs.FileInfo]. +func (f *fileinfo) Mode() fs.FileMode { + //revive:disable:var-naming These break Go convention and mirror C names. + const ( + S_IFSOCK = 0o140000 // socket + S_IFLNK = 0o120000 // symbolic link + S_IFREG = 0o100000 // regular file + S_IFBLK = 0o060000 // block device + S_IFDIR = 0o040000 // directory + S_IFCHR = 0o020000 // character device + S_IFIFO = 0o010000 // FIFO + ) + //revive:enable:var-naming + + m := f.statns.mode + fm := fs.FileMode(m & 0o7777) // Last 12 bits are the same. + // Translate the relevant Linux mode bits: + for _, b := range []struct { + Test int64 + Flag fs.FileMode + }{ + {S_IFSOCK, fs.ModeSocket}, + {S_IFLNK, fs.ModeSymlink}, + {S_IFBLK, fs.ModeDevice}, + {S_IFDIR, fs.ModeDir}, + {S_IFCHR, fs.ModeCharDevice}, + {S_IFIFO, fs.ModeNamedPipe}, + } { + if m&b.Test != 0 { + fm |= b.Flag + } + } + return fm +} + +// Name implements [fs.FileInfo]. +func (f *fileinfo) Name() string { return path.Base(f.path) } + +// Size implements [fs.FileInfo]. +func (f *fileinfo) Size() int64 { return f.statns.size } + +// Sys implements [fs.FileInfo]. +func (f *fileinfo) Sys() any { return f.statns } + +// ReadDir implements [fs.ReadDirFS]. +func (sys *FS) ReadDir(name string) ([]fs.DirEntry, error) { + if !fs.ValidPath(name) { + return nil, fs.ErrInvalid + } + return sys.readDir(toAbs(name)) +} + +func (sys *FS) readDir(name string) ([]fs.DirEntry, error) { + l := lib.Readdir(sys.g, name) + if l == nil { + return nil, getError(sys.g) + } + defer lib.FreeDirentList(l) + raws := unsafe.Slice(l.val, l.len) + ents := make([]dirent, len(raws)) + ret := make([]fs.DirEntry, len(raws)) + + for i := range raws { + raw := &raws[i] + ent := &ents[i] + ent.sys = sys + ent.dir = name + ent.name = toString(raw.name) + switch raw.ftyp { + case 'b': // Block special + ent.typ = fs.ModeDevice + case 'c': // Char special + ent.typ = fs.ModeCharDevice + case 'd': // Directory + ent.typ = fs.ModeDir + case 'f': // FIFO (named pipe) + ent.typ = fs.ModeNamedPipe + case 'l': // Symbolic link + ent.typ = fs.ModeSymlink + case 'r': // Regular file + ent.typ = 0 + case 's': // Socket + ent.typ = fs.ModeSocket + case 'u': // Unknown file type + ent.typ = fs.ModeIrregular + default: // aka '?': The readdir(3) call returned a d_type field with an unexpected value + ent.typ = fs.ModeIrregular + } + ret[i] = ent + } + + // TODO(hank): Cache ReadDir calls. + slices.SortFunc(ret, func(a, b fs.DirEntry) int { + return strings.Compare(a.Name(), b.Name()) + }) + return ret, nil +} + +var _ fs.DirEntry = (*dirent)(nil) + +type dirent struct { + sys *FS + dir string + name string + typ fs.FileMode +} + +// Info implements [fs.DirEntry]. +func (d *dirent) Info() (fs.FileInfo, error) { + return d.sys.stat(path.Join(d.dir, d.name)) +} + +// IsDir implements [fs.DirEntry]. +func (d *dirent) IsDir() bool { return d.typ == fs.ModeDir } + +// Name implements [fs.DirEntry]. +func (d *dirent) Name() string { return d.name } + +// Type implements [fs.DirEntry]. +func (d *dirent) Type() fs.FileMode { return d.typ } + +// ReadFile implements [fs.ReadFileFS]. +func (sys *FS) ReadFile(name string) ([]byte, error) { + if !fs.ValidPath(name) { + return nil, fs.ErrInvalid + } + return sys.readFile(toAbs(name)) +} + +func (sys *FS) readFile(name string) ([]byte, error) { + // If the [foreign pointer tracking proposal] makes it, then this method + // could avoid a copy and just hand out the foreign-backed slice. + // + // [foreign pointer tracking proposal]: https://github.com/golang/go/issues/70224 + v, loaded := sys.cache.contents.Load(name) + if !loaded { + rd, err := readFile(sys.g, name) + if err != nil { + return nil, err + } + v, _ = sys.cache.contents.LoadOrStore(name, rd) + } + f := v.(*guestfsFile) + b := make([]byte, len(f.data)) + copy(b, f.data) + return b, nil +} diff --git a/internal/guestfs/guestfs.go b/internal/guestfs/guestfs.go new file mode 100644 index 000000000..cb0d8cf94 --- /dev/null +++ b/internal/guestfs/guestfs.go @@ -0,0 +1,385 @@ +// Package guestfs provides functionality to inspect arbitrary disk images via +// [libguestfs(3)]. +// +// Users should call [Load] as early as possible in initialization to determine +// if needed libraries are present. +// +// [libguestfs(3)]: https://libguestfs.org/guestfs.3.html +package guestfs + +import ( + "errors" + "fmt" + "io/fs" + "iter" + "runtime" + "strings" + "structs" + "sync" + "syscall" + "unsafe" + + "github.com/ebitengine/purego" +) + +// Load attempts to dynamically load needed libraries, and reports +// [errors.ErrUnsupported] if unable to. +var Load = sync.OnceValue(doLoad) + +// DoLoad attempts to load needed libraries. +func doLoad() error { + var err loadError + err.LibC = loadLibC() + err.LibGuestFS = loadLib() + + if err.LibC == nil && err.LibGuestFS == nil { + return nil + } + return &err +} + +// LoadError is returned from [doLoad]. +type loadError struct { + LibC error + LibGuestFS error +} + +// Error implements [error]. +func (e *loadError) Error() string { + var b strings.Builder + multi := e.LibC != nil && e.LibGuestFS != nil + + b.WriteString("load error") + if multi { + b.WriteByte('s') + } + b.WriteString(": ") + if e.LibC != nil { + b.WriteString(e.LibC.Error()) + } + if multi { + b.WriteString(", ") + } + if e.LibGuestFS != nil { + b.WriteString(e.LibGuestFS.Error()) + } + + return b.String() +} + +// Unwrap implements [errors.Unwrap]. +func (e *loadError) Unwrap() []error { + errs := make([]error, 0, 2) + if e.LibC != nil { + errs = append(errs, e.LibC) + } + if e.LibGuestFS != nil { + errs = append(errs, e.LibGuestFS) + } + return errs +} + +// Is implements [errors.Is]. +func (e *loadError) Is(tgt error) bool { + return tgt == errors.ErrUnsupported +} + +// BUG(hank) This code assumes that Go's "int" and C's "int" are the +// same size. This is probably true, but is not guaranteed by spec. + +// BUG(hank) Some of the C functions have return-via-pointer semantics for +// communicating buffer sizes. These use a C "size_t" which this code assumes +// is 64 bits, but that's not guaranteed by spec. + +// Lib is a table of functions to call into the C guestfs library. +// +// The types are all in their "C type" reckoning. The [FS] handles these +// internally; users of this package shouldn't have to deal with the type +// nastiness. +var lib struct { + CreateFlags func(int) guestfs // guestfs_create_flags + Launch func(guestfs) int // guestfs_launch + Close func(guestfs) // guestfs_close + + LastError func(guestfs) string // guestfs_last_error + LastErrno func(guestfs) int // guestfs_last_errno + SetErrorHandler func(guestfs, uintptr, unsafe.Pointer) // guestfs_set_error_handler + PushErrorHandler func(guestfs, uintptr, unsafe.Pointer) // guestfs_push_error_handler + PopErrorHandler func(guestfs) // guestfs_pop_error_handler + + AddDrive func(guestfs, string) int // guestfs_add_drive + Mount func(guestfs, string, string) int // guestfs_mount + + Statns func(guestfs, string) *guestfsStatns // guestfs_statns + Readdir func(guestfs, string) *guestfsDirentList // guestfs_readdir + ReadFile func(guestfs, string, *uint64) *byte // guestfs_read_file + PRead func(guestfs, string, int, int64, *uint64) *byte // guestfs_pread + + FreeStatns func(*guestfsStatns) // guestfs_free_statns + FreeDirentList func(*guestfsDirentList) // guestfs_free_dirent_list +} + +// LoadLib reports an error if unable to dynamically load libguestfs. +// +// This should be called before attempting to use [lib]. +var loadLib = sync.OnceValue(func() error { + // BUG(hank) This package current hard-codes attempting to dynamically load + // "libguestfs.so.0". It's unclear what the correct library name/path is on + // MacOS. + handle, err := purego.Dlopen("libguestfs.so.0", purego.RTLD_NOW|purego.RTLD_GLOBAL) + if err != nil { + return fmt.Errorf("could not open libguestfs: %w", err) + } + // This handle to the library is never freed, which means the library can't + // be hot-reloaded. + + for name, fptr := range map[string]any{ + `guestfs_create`: &lib.CreateFlags, + `guestfs_launch`: &lib.Launch, + `guestfs_close`: &lib.Close, + `guestfs_last_error`: &lib.LastError, + `guestfs_last_errno`: &lib.LastErrno, + `guestfs_set_error_handler`: &lib.SetErrorHandler, + `guestfs_push_error_handler`: &lib.PushErrorHandler, + `guestfs_pop_error_handler`: &lib.PopErrorHandler, + `guestfs_add_drive_ro`: &lib.AddDrive, + `guestfs_mount_ro`: &lib.Mount, + `guestfs_statns`: &lib.Statns, + `guestfs_readdir`: &lib.Readdir, + `guestfs_read_file`: &lib.ReadFile, + `guestfs_pread`: &lib.PRead, + `guestfs_free_statns`: &lib.FreeStatns, + `guestfs_free_dirent_list`: &lib.FreeDirentList, + } { + cfn, err := purego.Dlsym(handle, name) + if err != nil { + return fmt.Errorf("could not open libguestfs: %w", err) + } + purego.RegisterFunc(fptr, cfn) + } + + return nil +}) + +//revive:disable:var-naming These break Go convention and mirror C names. + +// Guestfs is a [guestfs_h]. +// +// [guestfs_h]: https://libguestfs.org/guestfs.3.html#guestfs_h +type guestfs unsafe.Pointer + +// GuestfsDirentList is a [guestfs_dirent_list]. +// +// [guestfs_dirent_list]: https://libguestfs.org/guestfs.3.html#guestfs_dirent +type guestfsDirentList struct { + structs.HostLayout + len uint32 + val *guestfsDirent +} + +// GuestfsDirent is a [guestfs_dirent]. +// +// [guestfs_dirent]: https://libguestfs.org/guestfs.3.html#guestfs_dirent +type guestfsDirent struct { + structs.HostLayout + ino int64 + ftyp byte + // Name is a C string, which isn't handled by purego in structs. + // + // See the [refString] and [toString] helpers. + name *byte +} + +// GuestfsStatns is a [guestfs_statns]. +// +// [guestfs_status]: https://libguestfs.org/guestfs.3.html#guestfs_statns1 +type guestfsStatns struct { + structs.HostLayout + dev int64 + ino int64 + mode int64 + nlink int64 + uid int64 + gid int64 + rdev int64 + size int64 + blksize int64 + blocks int64 + atime_sec int64 + atime_nsec int64 + mtime_sec int64 + mtime_nsec int64 + ctime_sec int64 + ctime_nsec int64 + spare [6]int64 +} + +//revive:enable:var-naming + +// NewGuestfs creates a guestfs instance via [create_flags] with NO_ENVIRONMENT +// and NO_CLOSE_ON_EXIT, then unsets the error handler. +// +// [create_flags]: https://libguestfs.org/guestfs.3.html#guestfs_create_flags +func newGuestfs() (guestfs, error) { + //revive:disable:var-naming These break Go convention and mirror C names. + const ( + NO_ENVIRONMENT = (1 << 0) + NO_CLOSE_ON_EXIT = (1 << 1) + ) + //revive:enable:var-naming + + ptr := lib.CreateFlags(NO_ENVIRONMENT | NO_CLOSE_ON_EXIT) + if ptr == nil { + return nil, errors.New("unable to create guestfs handle") + } + // Clear the default error handler, which prints to stderr. + lib.SetErrorHandler(ptr, uintptr(unsafe.Pointer(nil)), nil) + return ptr, nil +} + +// GetError returns that last error encountered by the [guestfs] instance. +// +// If the last operation did not error, this function reports nil. +// +// The concrete type is [*guesfsErr], if additional introspection is needed. +func getError(g guestfs) error { + errno := lib.LastErrno(g) + if errno == 0 { + return nil + } + return &guestfsErr{ + err: syscall.Errno(errno), + message: lib.LastError(g), + } +} + +// GuestfsErr is an error reported by a [guestfs] instance. +type guestfsErr struct { + err error + message string +} + +// Error implements [error]. +func (g *guestfsErr) Error() string { return g.message } + +// Unwrap works with [errors.Unwrap]. +func (g *guestfsErr) Unwrap() error { return g.err } + +// Call [guestfs_add_drive]. +// +// [guestfs_add_drive]: https://libguestfs.org/guestfs.3.html#guestfs_add_drive +func addDrive(g guestfs, path string) error { + if lib.AddDrive(g, path) != 0 { + return getError(g) + } + return nil +} + +// Call [guestfs_launch]. +// +// [guestfs_launch]: https://libguestfs.org/guestfs.3.html#guestfs_launch +func launch(g guestfs) error { + if lib.Launch(g) != 0 { + return getError(g) + } + return nil +} + +// Call [guestfs_mount]. +// +// [guestfs_mount]: https://libguestfs.org/guestfs.3.html#guestfs_mount +func mount(g guestfs, dev, path string) error { + if lib.Mount(g, dev, path) != 0 { + return getError(g) + } + return nil +} + +// Pread calls [guestfs_pread], copying the returned data into "dst". +// +// [guestfs_pread]: https://libguestfs.org/guestfs.3.html#guestfs_pread +func pread(g guestfs, name string, dst []byte, offset int64) (int, error) { + var read uint64 + ptr := lib.PRead(g, name, len(dst), offset, &read) + if ptr == nil { + return 0, getError(g) + } + defer libC.Free(unsafe.Pointer(ptr)) + src := unsafe.Slice(ptr, read) + n := copy(dst, src) + return n, nil +} + +// ReadFile calls [guestfs_read_file] and arranges for the results to be freed +// automatically. +// +// [guestfs_read_file]: https://libguestfs.org/guestfs.3.html#guestfs_read_file +func readFile(g guestfs, name string) (*guestfsFile, error) { + var sz uint64 + ptr := lib.ReadFile(g, name, &sz) + if ptr == nil { + return nil, getError(g) + } + data := unsafe.Slice(ptr, sz) + f := &guestfsFile{data} + runtime.AddCleanup(f, libC.Free, unsafe.Pointer(ptr)) + return f, nil +} + +// GuestfsFile is a wrapper to hang a cleanup off of. +type guestfsFile struct { + data []byte +} + +func statns(g guestfs, path string) (*fileinfo, error) { + p := lib.Statns(g, path) + if p == nil { + return nil, getError(g) + } + info := &fileinfo{ + path: path, + statns: p, + } + runtime.AddCleanup(info, lib.FreeStatns, p) + return info, nil +} + +func readdir(g guestfs, p string) (iter.Seq[dirent], error) { + l := lib.Readdir(g, p) + if l == nil { + return nil, getError(g) + } + return func(yield func(dirent) bool) { + defer lib.FreeDirentList(l) + for _, d := range unsafe.Slice(l.val, l.len) { + name := toString(d.name) + r := dirent{ + dir: p, + name: name, + } + switch d.ftyp { + case 'b': // Block special + r.typ = fs.ModeDevice + case 'c': // Char special + r.typ = fs.ModeCharDevice + case 'd': // Directory + r.typ = fs.ModeDir + case 'f': // FIFO (named pipe) + r.typ = fs.ModeNamedPipe + case 'l': // Symbolic link + r.typ = fs.ModeSymlink + case 'r': // Regular file + r.typ = 0 + case 's': // Socket + r.typ = fs.ModeSocket + case 'u': // Unknown file type + r.typ = fs.ModeIrregular + default: // aka '?': The readdir(3) call returned a d_type field with an unexpected value + r.typ = fs.ModeIrregular + } + if !yield(r) { + return + } + } + }, nil +} diff --git a/internal/guestfs/guestfs_test.go b/internal/guestfs/guestfs_test.go new file mode 100644 index 000000000..10a59a85a --- /dev/null +++ b/internal/guestfs/guestfs_test.go @@ -0,0 +1,226 @@ +package guestfs + +import ( + "bytes" + "crypto/sha256" + "encoding/json" + "errors" + "io/fs" + "os" + "os/exec" + "path/filepath" + "slices" + "strings" + "sync" + "testing" + "time" + + "github.com/quay/claircore/test" +) + +func TestError(t *testing.T) { + t.Run("Unsupported", func(t *testing.T) { + err := &loadError{ + LibC: errors.New("libc"), + LibGuestFS: errors.New("libguestfs"), + } + + t.Logf("%q == %q", err, errors.ErrUnsupported) + if !errors.Is(err, errors.ErrUnsupported) { + t.Fail() + } + }) + t.Run("Unwrap", func(t *testing.T) { + errLibC := errors.New("libc") + err := &loadError{ + LibC: errLibC, + LibGuestFS: errors.New("libguestfs"), + } + + t.Logf("%q == %q", err, errLibC) + if !errors.Is(err, errLibC) { + t.Fail() + } + }) + t.Run("Single", func(t *testing.T) { + err := &loadError{ + LibC: errors.New("libc"), + } + got := err.Error() + want := "load error: libc" + + t.Logf("%q == %q", got, want) + if got != want { + t.Fail() + } + }) + t.Run("Plural", func(t *testing.T) { + err := &loadError{ + LibC: errors.New("libc"), + LibGuestFS: errors.New("libguestfs"), + } + got := err.Error() + want := "load errors: libc, libguestfs" + + t.Logf("%q == %q", got, want) + if got != want { + t.Fail() + } + }) +} + +var ( + skip error + tryLoad sync.Once +) + +func load(t testing.TB) { + tryLoad.Do(func() { + skip = loadLib() + if skip != nil { + t.Log(skip) + return + } + t.Log("loaded libguestfs") + need := []string{ + "skopeo", + "mkfs.erofs", + "gzip", + } + errs := make([]error, len(need)) + for i, exe := range need { + _, errs[i] = exec.LookPath(exe) + } + skip = errors.Join(errs...) + if skip != nil { + t.Log(skip) + return + } + }) + if skip != nil { + t.SkipNow() + } +} + +func makeErofsFromLayer(ref string) func(testing.TB, *os.File) { + return func(t testing.TB, out *os.File) { + if err := out.Close(); err != nil { + t.Error(err) + } + dir := t.TempDir() + t.Logf("using last layer in %q", ref) + + var outBuf bytes.Buffer + var errBuf bytes.Buffer + cmd := exec.CommandContext(t.Context(), "skopeo", "copy", "--remove-signatures", "docker://"+ref, "oci:"+dir) + cmd.Stdout = &outBuf + cmd.Stderr = &errBuf + if err := cmd.Run(); err != nil { + t.Log(err) + t.Logf("stdout:\n%s", outBuf.String()) + t.Logf("stderr:\n%s", errBuf.String()) + t.FailNow() + } + t.Log("fetched ref") + + type desc struct { + MediaType string + Digest string + } + var index struct { + Manifests []desc + } + f, err := os.Open(filepath.Join(dir, "index.json")) + if err != nil { + t.Fatal(err) + } + defer f.Close() + if err := json.NewDecoder(f).Decode(&index); err != nil { + t.Fatal(err) + } + var algo, digest string + for _, m := range index.Manifests { + if m.MediaType == "application/vnd.oci.image.manifest.v1+json" { + var ok bool + algo, digest, ok = strings.Cut(m.Digest, ":") + if ok { + break + } + } + } + if digest == "" { + t.Fatal("unable to find image manifest") + } + + var manifest struct { + Layers []desc + } + f, err = os.Open(filepath.Join(dir, "blobs", algo, digest)) + if err != nil { + t.Fatal(err) + } + defer f.Close() + if err := json.NewDecoder(f).Decode(&manifest); err != nil { + t.Fatal(err) + } + slices.Reverse(manifest.Layers) + d := manifest.Layers[0].Digest + algo, digest, ok := strings.Cut(d, ":") + if !ok { + t.Fatalf("bad digest: %q", d) + } + t.Logf("using layer: %s:%s", algo, digest) + + layer := filepath.Join(dir, "blobs", algo, digest) + + t.Logf("writing erofs to: %s", out.Name()) + cmd = exec.CommandContext(t.Context(), "mkfs.erofs", + "--tar=f", "--ungzip", "--sort=none", out.Name(), layer) + cmd.Stdout = &outBuf + cmd.Stderr = &errBuf + if err := cmd.Run(); err != nil { + t.Log(err) + t.Logf("stdout:\n%s", outBuf.String()) + t.Logf("stderr:\n%s", errBuf.String()) + t.FailNow() + } + t.Log("created erofs") + } +} + +func TestLoad(t *testing.T) { + load(t) + name := test.GenerateFixture(t, + "layer.erofs", + time.Time{}, + makeErofsFromLayer("registry.access.redhat.com/ubi9/httpd-24:latest")) + + ctx := t.Context() + sys, err := Open(ctx, name) + if err != nil { + t.Fatal(err) + } + defer sys.Close() + t.Log("opened erofs") + + p := `usr/sbin/httpd` + b, err := fs.ReadFile(sys, p) + if err != nil { + t.Error(err) + } + ck := sha256.Sum256(b) + t.Logf("%s: sha256:%x", p, ck) + + // BUG(hank) The directory walking doesn't work correctly. + fs.WalkDir(sys, "usr", func(p string, ent fs.DirEntry, err error) error { + info, err := ent.Info() + if err != nil { + t.Error(err) + return err + } + t.Log(info.IsDir()) + t.Logf("%q: %+#v", p, info) + t.Log(p, fs.FormatFileInfo(info)) + return nil + }) +} diff --git a/internal/guestfs/libc.go b/internal/guestfs/libc.go new file mode 100644 index 000000000..b510795a0 --- /dev/null +++ b/internal/guestfs/libc.go @@ -0,0 +1,132 @@ +package guestfs + +import ( + "fmt" + "runtime" + "sync" + "unsafe" + + "github.com/ebitengine/purego" +) + +func getSystemLibrary() string { + switch runtime.GOOS { + case "darwin": + return "/usr/lib/libSystem.B.dylib" + case "linux": + return "libc.so.6" + default: + panic(fmt.Errorf("GOOS %q is not supported", runtime.GOOS)) + } +} + +// LibC is a table of functions to call into the C standard library. +// +// Only functions used in code are opened. +var libC struct { + Free func(unsafe.Pointer) +} + +// LoadLibC reports an error if unable to dynamically load libc. +// +// This should be called before attempting to use [libC]. +var loadLibC = sync.OnceValue(func() error { + handle, err := purego.Dlopen(getSystemLibrary(), purego.RTLD_LAZY|purego.RTLD_GLOBAL) + if err != nil { + return fmt.Errorf("could not open libc: %w", err) + } + cfn, err := purego.Dlsym(handle, `free`) + if err != nil { + return fmt.Errorf("could not open libc: %w", err) + } + purego.RegisterFunc(&libC.Free, cfn) + return nil +}) + +// Strlen is a very simple strlen implementation. +// +// # SAFETY +// +// This is allowed by Go rules, as long as the string is properly terminated. If +// it is not, this may cause a panic. +func strlen(p unsafe.Pointer) (l int) { + //revive:disable:empty-block The side-effect of the "post" statement is used. + for ; *(*byte)(unsafe.Add(p, l)) != 0x00; l++ { + } + //revive:enable:empty-block + return l +} + +// ToString returns the pointed-to C string copied into a Go string. +func toString(charstar *byte) string { + l := strlen(unsafe.Pointer(charstar)) + src := unsafe.Slice(charstar, l) + dst := make([]byte, l) + copy(dst, src) + return string(dst) +} + +// RefString reinterprets the pointed-to C string as the backing memory for a Go +// string. +// +// # SAFETY +// +// The returned string is only valid as long as the pointed-to memory is valid. +func refString(charstar *byte) string { + return unsafe.String(charstar, strlen(unsafe.Pointer(charstar))) +} + +/* +// Return the number of non-NULL pointers at the pointed-to address. +func countPointers(ptrptr unsafe.Pointer) int { + var n uintptr + for { + p := unsafe.Pointer(uintptr(ptrptr) + n*unsafe.Sizeof(uintptr(0))) + if p == nil { + break + } + n++ + if n > 4096 { // failsafe-ish + panic("too many elements!") + } + } + return int(n) +} + +// ToStrings returns an iterator over the C memory of an array of strings. This +// function takes ownership of the memory and arranges for it to be freed when +// the iterator is exhausted. +func toStrings(ptrptr unsafe.Pointer) iter.Seq[string] { + return func(yeild func(string) bool) { + toFree := []unsafe.Pointer{ptrptr} + defer func() { + for _, p := range toFree { + libc.Free(p) + } + }() + + n := uintptr(0) + for { + p := unsafe.Add(ptrptr, n*unsafe.Sizeof(n)) + if p == nil { + return + } + toFree = append(toFree, p) + + // very simple strlen implementation: + s := p + for ; s != nil; s = unsafe.Add(s, 1) { + } + l := int(uintptr(s) - uintptr(p)) + + if !yeild(unsafe.String((*byte)(p), l)) { + return + } + n++ + if n > 4096 { // failsafe-ish + panic("too many elements!") + } + } + } +} +*/