diff --git a/BUILD.bit b/BUILD.bit index c0b990f..c5ac885 100644 --- a/BUILD.bit +++ b/BUILD.bit @@ -7,6 +7,8 @@ let git_commit = exec('git rev-parse HEAD') let arch = ["amd64", "arm64"] let command = ["cachew", "cachewd"] +# Format all Go files. +pre fmt = go.fmt {} # Build client and server binaries cachew = go.exe { @@ -29,7 +31,7 @@ test = go.test { # Lint lint = go.lint {} -cross-compile[arch, command] = go.exe { +cross[arch, command] = go.exe { package = "./cmd/${command}" output = "dist/${command}-linux-${arch}" cgo = false @@ -42,7 +44,7 @@ image = docker.image { tag = "cachew:latest" context = "." dockerfile = "docker/Dockerfile" - depends_on = [cross-compile] + depends_on = [cross[arm64, cachew], cross[arm64, cachewd]] } # Build production image. @@ -50,7 +52,7 @@ multi-arch-image = docker.image { tag = "${registry}/cachew:latest" context = "." dockerfile = "docker/Dockerfile" - depends_on = [cross-compile] + depends_on = [cross] platform = arch | prefix("linux/") } diff --git a/bin/.bit-0.4.0.pkg b/bin/.bit-0.14.1.pkg similarity index 100% rename from bin/.bit-0.4.0.pkg rename to bin/.bit-0.14.1.pkg diff --git a/bin/bit b/bin/bit index 0da382e..59020d7 120000 --- a/bin/bit +++ b/bin/bit @@ -1 +1 @@ -.bit-0.4.0.pkg \ No newline at end of file +.bit-0.14.1.pkg \ No newline at end of file diff --git a/client/client_test.go b/client/client_test.go index 322f5c0..567a18e 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -234,7 +234,7 @@ func TestStatsReturned(t *testing.T) { assert.Equal(t, want, got) } -func TestSnapshotRoundTrip(t *testing.T) { +func TestSaveRestoreRoundTrip(t *testing.T) { srv := newFakeServer(nil) defer srv.Close() @@ -248,10 +248,12 @@ func TestSnapshotRoundTrip(t *testing.T) { assert.NoError(t, os.WriteFile(filepath.Join(src, "sub", "b.txt"), []byte("bravo"), 0o644)) key := client.NewKey("snapshot") - assert.NoError(t, c.Snapshot(ctx, key, src, client.SnapshotOptions{})) + assert.NoError(t, c.Save(ctx, key, src, []string{"."})) dst := filepath.Join(t.TempDir(), "out") - assert.NoError(t, c.Restore(ctx, key, dst, client.RestoreOptions{})) + hit, err := c.Restore(ctx, key, dst) + assert.NoError(t, err) + assert.True(t, hit) a, err := os.ReadFile(filepath.Join(dst, "a.txt")) assert.NoError(t, err) diff --git a/client/files.go b/client/files.go new file mode 100644 index 0000000..501589e --- /dev/null +++ b/client/files.go @@ -0,0 +1,202 @@ +package client + +import ( + "context" + "crypto/sha256" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "slices" + "time" + + "github.com/alecthomas/errors" + "github.com/bmatcuk/doublestar/v4" +) + +// SaveOption configures Save. +type SaveOption interface{ applySave(*saveConfig) } + +// RestoreOption configures Restore. +type RestoreOption interface{ applyRestore(*restoreConfig) } + +// SaveRestoreOption configures both Save and Restore. +type SaveRestoreOption interface { + SaveOption + RestoreOption +} + +type saveConfig struct { + ttl time.Duration + exclude []string + extraHeaders http.Header + zstdThreads int +} + +type restoreConfig struct { + zstdThreads int +} + +type ttlOpt time.Duration + +func (o ttlOpt) applySave(c *saveConfig) { c.ttl = time.Duration(o) } + +// WithTTL sets the TTL on the uploaded object. Zero (the default) uses the +// server default. +func WithTTL(d time.Duration) SaveOption { return ttlOpt(d) } + +type excludeOpt []string + +func (o excludeOpt) applySave(c *saveConfig) { c.exclude = append(c.exclude, o...) } + +// WithExclude adds tar --exclude patterns applied during Save. +func WithExclude(patterns ...string) SaveOption { return excludeOpt(patterns) } + +type extraHeadersOpt http.Header + +func (o extraHeadersOpt) applySave(c *saveConfig) { + if c.extraHeaders == nil { + c.extraHeaders = make(http.Header) + } + for k, values := range o { + for _, v := range values { + c.extraHeaders.Add(k, v) + } + } +} + +// WithExtraHeaders merges additional headers into the upload request. +func WithExtraHeaders(h http.Header) SaveOption { return extraHeadersOpt(h) } + +type zstdThreadsOpt int + +func (o zstdThreadsOpt) applySave(c *saveConfig) { c.zstdThreads = int(o) } +func (o zstdThreadsOpt) applyRestore(c *restoreConfig) { c.zstdThreads = int(o) } + +// WithZstdThreads sets zstd parallelism. Zero (the default) uses all CPU +// cores. +func WithZstdThreads(n int) SaveRestoreOption { return zstdThreadsOpt(n) } + +// Save archives the given paths within baseDir and uploads the tar+zstd +// stream under key. Any existing object at key is overwritten. +func (c *Client) Save(ctx context.Context, key Key, baseDir string, paths []string, opts ...SaveOption) error { + var cfg saveConfig + for _, opt := range opts { + opt.applySave(&cfg) + } + + headers := make(http.Header) + headers.Set("Content-Type", "application/zstd") + headers.Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filepath.Base(baseDir)+".tar.zst")) + for k, values := range cfg.extraHeaders { + for _, v := range values { + headers.Set(k, v) + } + } + + wc, err := c.Create(ctx, key, headers, cfg.ttl) + if err != nil { + return errors.Wrap(err, "failed to create object") + } + if err := Archive(ctx, wc, baseDir, paths, cfg.exclude, cfg.zstdThreads); err != nil { + return errors.Join(err, wc.Close()) + } + return errors.Wrap(wc.Close(), "failed to close writer") +} + +// Restore downloads the archive stored under key and extracts it into +// baseDir. Returns (false, nil) on cache miss so callers can populate +// baseDir and then Save. +func (c *Client) Restore(ctx context.Context, key Key, baseDir string, opts ...RestoreOption) (bool, error) { + var cfg restoreConfig + for _, opt := range opts { + opt.applyRestore(&cfg) + } + + rc, _, err := c.Open(ctx, key) + if errors.Is(err, os.ErrNotExist) { + return false, nil + } + if err != nil { + return false, errors.Wrap(err, "failed to open object") + } + defer rc.Close() //nolint:errcheck + + if err := Extract(ctx, rc, baseDir, cfg.zstdThreads); err != nil { + return false, errors.WithStack(err) + } + return true, nil +} + +// HashFiles returns a Key derived from the contents of all regular files +// matched by the given glob patterns. Patterns use doublestar syntax, so ** +// matches any number of path segments (e.g. "**/go.sum"). Matches are +// deduplicated and sorted by path, and each file's path and contents are +// folded into the digest so that content or path changes invalidate the key. +// Directories and non-regular matches are skipped. Returns an error if no +// regular files match any pattern, to avoid silently producing a constant +// key on typos. +func HashFiles(patterns ...string) (Key, error) { + if len(patterns) == 0 { + return Key{}, errors.New("at least one pattern is required") + } + seen := make(map[string]struct{}) + var paths []string + for _, pattern := range patterns { + matches, err := doublestar.FilepathGlob(pattern) + if err != nil { + return Key{}, errors.Wrapf(err, "invalid pattern %q", pattern) + } + for _, match := range matches { + match = filepath.Clean(match) + info, err := os.Lstat(match) + if err != nil { + return Key{}, errors.Wrapf(err, "failed to stat %q", match) + } + if !info.Mode().IsRegular() { + continue + } + if _, ok := seen[match]; ok { + continue + } + seen[match] = struct{}{} + paths = append(paths, match) + } + } + if len(paths) == 0 { + return Key{}, errors.Errorf("no regular files matched patterns %v", patterns) + } + slices.Sort(paths) + + h := sha256.New() + for _, path := range paths { + if err := hashFile(h, path); err != nil { + return Key{}, err + } + } + var key Key + copy(key[:], h.Sum(nil)) + return key, nil +} + +func hashFile(h io.Writer, path string) error { + if _, err := h.Write([]byte(path)); err != nil { + return errors.Wrap(err, "failed to hash path") + } + if _, err := h.Write([]byte{0}); err != nil { + return errors.Wrap(err, "failed to hash separator") + } + f, err := os.Open(path) + if err != nil { + return errors.Wrapf(err, "failed to open %q", path) + } + defer f.Close() //nolint:errcheck + if _, err := io.Copy(h, f); err != nil { + return errors.Wrapf(err, "failed to hash %q", path) + } + if _, err := h.Write([]byte{0}); err != nil { + return errors.Wrap(err, "failed to hash separator") + } + return nil +} diff --git a/client/files_test.go b/client/files_test.go new file mode 100644 index 0000000..5774cd5 --- /dev/null +++ b/client/files_test.go @@ -0,0 +1,159 @@ +package client_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/alecthomas/assert/v2" + + "github.com/block/cachew/client" +) + +func TestHashFilesDeterministic(t *testing.T) { + dir := t.TempDir() + a := filepath.Join(dir, "a") + b := filepath.Join(dir, "b") + assert.NoError(t, os.WriteFile(a, []byte("alpha"), 0o644)) + assert.NoError(t, os.WriteFile(b, []byte("bravo"), 0o644)) + + h1, err := client.HashFiles(a, b) + assert.NoError(t, err) + h2, err := client.HashFiles(b, a) + assert.NoError(t, err) + assert.Equal(t, h1, h2, "hash should be independent of argument order") + + h3, err := client.HashFiles(filepath.Join(dir, "*")) + assert.NoError(t, err) + assert.Equal(t, h1, h3, "glob and explicit patterns should hash identically") +} + +func TestHashFilesInvalidation(t *testing.T) { + baseline := func(t *testing.T) (string, client.Key) { + t.Helper() + dir := t.TempDir() + assert.NoError(t, os.WriteFile(filepath.Join(dir, "a"), []byte("v1"), 0o644)) + h, err := client.HashFiles(filepath.Join(dir, "a")) + assert.NoError(t, err) + return dir, h + } + + t.Run("ContentChangeInvalidates", func(t *testing.T) { + dir, h1 := baseline(t) + assert.NoError(t, os.WriteFile(filepath.Join(dir, "a"), []byte("v2"), 0o644)) + h2, err := client.HashFiles(filepath.Join(dir, "a")) + assert.NoError(t, err) + assert.NotEqual(t, h1, h2) + }) + + t.Run("AddedFileInvalidates", func(t *testing.T) { + dir, h1 := baseline(t) + assert.NoError(t, os.WriteFile(filepath.Join(dir, "b"), []byte("extra"), 0o644)) + h2, err := client.HashFiles(filepath.Join(dir, "*")) + assert.NoError(t, err) + assert.NotEqual(t, h1, h2) + }) + + t.Run("RenameInvalidates", func(t *testing.T) { + dir := t.TempDir() + a := filepath.Join(dir, "a") + renamed := filepath.Join(dir, "a2") + assert.NoError(t, os.WriteFile(a, []byte("same"), 0o644)) + h1, err := client.HashFiles(a) + assert.NoError(t, err) + assert.NoError(t, os.Rename(a, renamed)) + h2, err := client.HashFiles(renamed) + assert.NoError(t, err) + assert.NotEqual(t, h1, h2, "identical contents under a different path should hash differently") + }) +} + +func TestHashFilesErrors(t *testing.T) { + tests := []struct { + name string + patterns []string + }{ + {name: "NoPatterns", patterns: nil}, + {name: "NoMatches", patterns: []string{filepath.Join(t.TempDir(), "missing-*")}}, + {name: "DirectoryOnly", patterns: []string{t.TempDir()}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := client.HashFiles(tt.patterns...) + assert.Error(t, err) + }) + } +} + +func TestHashFilesDoubleStar(t *testing.T) { + dir := t.TempDir() + assert.NoError(t, os.MkdirAll(filepath.Join(dir, "a", "b"), 0o755)) + assert.NoError(t, os.WriteFile(filepath.Join(dir, "go.sum"), []byte("root"), 0o644)) + assert.NoError(t, os.WriteFile(filepath.Join(dir, "a", "go.sum"), []byte("one"), 0o644)) + assert.NoError(t, os.WriteFile(filepath.Join(dir, "a", "b", "go.sum"), []byte("two"), 0o644)) + assert.NoError(t, os.WriteFile(filepath.Join(dir, "a", "other.txt"), []byte("ignore"), 0o644)) + + h1, err := client.HashFiles(filepath.Join(dir, "**", "go.sum")) + assert.NoError(t, err) + + h2, err := client.HashFiles( + filepath.Join(dir, "go.sum"), + filepath.Join(dir, "a", "go.sum"), + filepath.Join(dir, "a", "b", "go.sum"), + ) + assert.NoError(t, err) + + assert.Equal(t, h1, h2, "** should match all nested files matching the suffix") +} + +func TestHashFilesSkipsDirectories(t *testing.T) { + dir := t.TempDir() + assert.NoError(t, os.Mkdir(filepath.Join(dir, "sub"), 0o755)) + assert.NoError(t, os.WriteFile(filepath.Join(dir, "a"), []byte("x"), 0o644)) + + h1, err := client.HashFiles(filepath.Join(dir, "*")) + assert.NoError(t, err) + + h2, err := client.HashFiles(filepath.Join(dir, "a")) + assert.NoError(t, err) + + assert.Equal(t, h1, h2, "directories should be skipped, not cause errors") +} + +func TestHashKeySaveRestore(t *testing.T) { + srv := newFakeServer(nil) + defer srv.Close() + + c := client.New(srv.URL, nil).Namespace("files") + defer c.Close() + ctx := t.Context() + + src := t.TempDir() + assert.NoError(t, os.WriteFile(filepath.Join(src, "go.sum"), []byte("sumv1"), 0o644)) + assert.NoError(t, os.MkdirAll(filepath.Join(src, "sub"), 0o755)) + assert.NoError(t, os.WriteFile(filepath.Join(src, "hello.txt"), []byte("world"), 0o644)) + assert.NoError(t, os.WriteFile(filepath.Join(src, "sub", "deep.txt"), []byte("deep"), 0o644)) + + key, err := client.HashFiles(filepath.Join(src, "go.sum")) + assert.NoError(t, err) + + dst := filepath.Join(t.TempDir(), "restore") + + hit, err := c.Restore(ctx, key, dst) + assert.NoError(t, err) + assert.False(t, hit, "fresh key should miss") + + assert.NoError(t, c.Save(ctx, key, src, []string{"hello.txt", "sub"})) + + hit, err = c.Restore(ctx, key, dst) + assert.NoError(t, err) + assert.True(t, hit, "saved key should hit") + + got, err := os.ReadFile(filepath.Join(dst, "hello.txt")) + assert.NoError(t, err) + assert.Equal(t, "world", string(got)) + + deep, err := os.ReadFile(filepath.Join(dst, "sub", "deep.txt")) + assert.NoError(t, err) + assert.Equal(t, "deep", string(deep)) +} diff --git a/client/snapshot.go b/client/snapshot.go deleted file mode 100644 index 6d24189..0000000 --- a/client/snapshot.go +++ /dev/null @@ -1,71 +0,0 @@ -package client - -import ( - "context" - "fmt" - "net/http" - "path/filepath" - "time" - - "github.com/alecthomas/errors" -) - -// SnapshotOptions control how an archive is created and uploaded. -type SnapshotOptions struct { - // TTL for the uploaded object. Zero uses the server default. - TTL time.Duration - // Exclude patterns (tar --exclude syntax). - Exclude []string - // ZstdThreads controls zstd parallelism; 0 uses all CPU cores. - ZstdThreads int - // ExtraHeaders are merged into the upload headers alongside Content-Type - // and Content-Disposition. - ExtraHeaders http.Header -} - -// RestoreOptions control how an archive is downloaded and extracted. -type RestoreOptions struct { - // ZstdThreads controls zstd parallelism; 0 uses all CPU cores. - ZstdThreads int -} - -// Snapshot archives a directory and uploads the tar+zstd stream under the -// given key. -func (c *Client) Snapshot(ctx context.Context, key Key, directory string, opts SnapshotOptions) error { - return c.SnapshotPaths(ctx, key, directory, filepath.Base(directory), []string{"."}, opts) -} - -// SnapshotPaths archives named paths within baseDir and uploads the tar+zstd -// stream under the given key. archiveName is used to set the upload's -// Content-Disposition filename. -func (c *Client) SnapshotPaths(ctx context.Context, key Key, baseDir, archiveName string, includePaths []string, opts SnapshotOptions) error { - headers := make(http.Header) - headers.Set("Content-Type", "application/zstd") - headers.Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", archiveName+".tar.zst")) - for k, values := range opts.ExtraHeaders { - for _, v := range values { - headers.Set(k, v) - } - } - - wc, err := c.Create(ctx, key, headers, opts.TTL) - if err != nil { - return errors.Wrap(err, "failed to create object") - } - - if err := Archive(ctx, wc, baseDir, includePaths, opts.Exclude, opts.ZstdThreads); err != nil { - return errors.Join(err, wc.Close()) - } - return errors.Wrap(wc.Close(), "failed to close writer") -} - -// Restore downloads an archive by key and extracts it into directory. -func (c *Client) Restore(ctx context.Context, key Key, directory string, opts RestoreOptions) error { - rc, _, err := c.Open(ctx, key) - if err != nil { - return errors.Wrap(err, "failed to open object") - } - defer rc.Close() - - return errors.WithStack(Extract(ctx, rc, directory, opts.ZstdThreads)) -} diff --git a/cmd/cachew/main.go b/cmd/cachew/main.go index 3737bc0..94fefa3 100644 --- a/cmd/cachew/main.go +++ b/cmd/cachew/main.go @@ -175,12 +175,12 @@ type SnapshotCmd struct { func (c *SnapshotCmd) Run(ctx context.Context, api *client.Client) error { fmt.Fprintf(os.Stderr, "Archiving %s...\n", c.Directory) //nolint:forbidigo - opts := client.SnapshotOptions{ - TTL: c.TTL, - Exclude: c.Exclude, - ZstdThreads: c.ZstdThreads, - } - if err := api.Namespace(c.Namespace).Snapshot(ctx, c.Key.Key(), c.Directory, opts); err != nil { + err := api.Namespace(c.Namespace).Save(ctx, c.Key.Key(), c.Directory, []string{"."}, + client.WithTTL(c.TTL), + client.WithExclude(c.Exclude...), + client.WithZstdThreads(c.ZstdThreads), + ) + if err != nil { return errors.Wrap(err, "failed to create snapshot") } @@ -197,10 +197,14 @@ type RestoreCmd struct { func (c *RestoreCmd) Run(ctx context.Context, api *client.Client) error { fmt.Fprintf(os.Stderr, "Restoring to %s...\n", c.Directory) //nolint:forbidigo - opts := client.RestoreOptions{ZstdThreads: c.ZstdThreads} - if err := api.Namespace(c.Namespace).Restore(ctx, c.Key.Key(), c.Directory, opts); err != nil { + hit, err := api.Namespace(c.Namespace).Restore(ctx, c.Key.Key(), c.Directory, + client.WithZstdThreads(c.ZstdThreads)) + if err != nil { return errors.Wrap(err, "failed to restore snapshot") } + if !hit { + return errors.Errorf("cache miss: %s", c.Key.String()) + } fmt.Fprintf(os.Stderr, "Snapshot restored: %s\n", c.Key.String()) //nolint:forbidigo return nil diff --git a/go.mod b/go.mod index 295f8a6..ac72325 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.25.5 require ( github.com/alecthomas/hcl/v2 v2.6.0 github.com/alecthomas/kong v1.15.0 + github.com/bmatcuk/doublestar/v4 v4.10.0 github.com/golang-jwt/jwt/v5 v5.3.1 github.com/goproxy/goproxy v0.26.0 github.com/lmittmann/tint v1.1.3 diff --git a/go.sum b/go.sum index 11eeef3..2e5df31 100644 --- a/go.sum +++ b/go.sum @@ -22,6 +22,8 @@ github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bmatcuk/doublestar/v4 v4.10.0 h1:zU9WiOla1YA122oLM6i4EXvGW62DvKZVxIe6TYWexEs= +github.com/bmatcuk/doublestar/v4 v4.10.0/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/bytecodealliance/wasmtime-go/v39 v39.0.1 h1:RibaT47yiyCRxMOj/l2cvL8cWiWBSqDXHyqsa9sGcCE= github.com/bytecodealliance/wasmtime-go/v39 v39.0.1/go.mod h1:miR4NYIEBXeDNamZIzpskhJ0z/p8al+lwMWylQ/ZJb4= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=