Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
60320ba
feat: search, load and validates manifests
upils Nov 26, 2025
c67c661
fix: revert unintentional change
upils Nov 26, 2025
22ae256
refactor: move things around
upils Nov 26, 2025
6cb437f
refactor: move logic to manifestutil
upils Nov 26, 2025
00825de
feat: validate rootfs with manifest
upils Nov 27, 2025
442bcc6
fix: debugging
upils Nov 27, 2025
e19f6f1
fix: improve hardlinks check
upils Nov 27, 2025
b6ecc27
feat: trying alternative approach
upils Nov 28, 2025
1406b18
feat: refine first approach
upils Dec 1, 2025
8aba035
refactor: dedicated verify.go
upils Dec 1, 2025
70b2c5e
refactor: rework verify* funcs
upils Dec 2, 2025
c8b7a39
fix: typos and inconsistencies
upils Dec 2, 2025
1181871
refactor: improve readability
upils Dec 2, 2025
31f1d3d
refactor: improve consistency
upils Dec 3, 2025
df49dfc
style: respect internal best practices
upils Dec 3, 2025
f4169b9
style: minor consistency fixes
upils Dec 4, 2025
aade454
fix: ignore size check on manifest
upils Dec 5, 2025
e0bd4a4
style: apply suggestions
upils Dec 8, 2025
2e572fb
style: apply suggestions
upils Dec 8, 2025
3c5de1c
style: apply suggestions
upils Dec 8, 2025
7eff973
style: apply suggestions
upils Dec 8, 2025
b7c7c66
style: apply suggestions
upils Dec 8, 2025
c7196e9
fix: apply suggestions
upils Dec 16, 2025
7539db7
refactor: simplify
upils Dec 19, 2025
cd22c1f
feat: intermediary representation
upils Jan 5, 2026
27d8645
refactor: simplify existing dir checking
upils Jan 5, 2026
373f3c1
refactor: regroup hardlink-related code
upils Jan 5, 2026
1414990
refactor: refine manifest checking API
upils Jan 5, 2026
2ce5b21
refactor: add obtainManifest
upils Jan 6, 2026
17162f4
refactor: simplify by removing SliceKeys function
upils Jan 6, 2026
c21ef34
style: remove redondant comment
upils Jan 13, 2026
77f5771
refactor: rework pathInfos comparison
upils Jan 14, 2026
40ef8e8
refactor: simplify FindPathsInRelease
upils Jan 14, 2026
0a25844
refactor: refine API
upils Jan 14, 2026
71c4496
refactor: simplify based on PR suggestions
upils Jan 15, 2026
4696dcc
style: tweak error messages
upils Jan 15, 2026
089109d
fix: fail if dir inconsistent with manifest
upils Jan 16, 2026
09faa6a
fix: use loaded manifest hash as reference
upils Jan 16, 2026
bd7a707
fix: raise error when path should not be hardlinked
upils Jan 19, 2026
3ea70ce
fix: collect and record reference manifest size
upils Jan 19, 2026
3efb7d2
feat: log root directory processing
upils Jan 19, 2026
a421818
refactor: more experiment around API design
upils Jan 21, 2026
e88de2d
refactor: experiment to improve API
upils Jan 21, 2026
acac421
refactor: more refining
upils Jan 22, 2026
0262cb2
refactor: CheckDir in slicer
upils Jan 22, 2026
7445fcd
refactor: add check in slicer
upils Jan 22, 2026
5fca0c5
refactor: simplify and reduce slicer API
upils Jan 22, 2026
fb6500b
ci: rerun
upils Jan 22, 2026
d7fa155
wip
upils Jan 23, 2026
43e8a71
refactor: cleaning
upils Jan 23, 2026
46d2119
fix: check manifest consistency at selection
upils Jan 23, 2026
164c68b
refactor: return a manifest from inspection
upils Jan 26, 2026
15550ad
refactor: apply PR suggestions
upils Jan 26, 2026
4ecb555
refactor: avoid iterating twice on slices
upils Jan 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions cmd/chisel/cmd_cut.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/canonical/chisel/internal/cache"
"github.com/canonical/chisel/internal/setup"
"github.com/canonical/chisel/internal/slicer"
"github.com/canonical/chisel/public/manifest"
)

var shortCutHelp = "Cut a tree with selected slices"
Expand Down Expand Up @@ -73,6 +74,21 @@ func (cmd *cmdCut) Execute(args []string) error {
}
}

mfest, err := slicer.Inspect(cmd.RootDir, release)
if err != nil {
return err
}
if mfest != nil {
mfest.IterateSlices("", func(slice *manifest.Slice) error {
sk, err := setup.ParseSliceKey(slice.Name)
if err != nil {
return err
}
sliceKeys = append(sliceKeys, sk)
return nil
})
}

selection, err := setup.Select(release, sliceKeys, cmd.Arch)
if err != nil {
return err
Expand Down
45 changes: 38 additions & 7 deletions internal/manifestutil/manifestutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,44 @@ import (

const DefaultFilename = "manifest.wall"

func collectManifests(slice *setup.Slice, collector func(path string, slice *setup.Slice)) {
for path, info := range slice.Contents {
if info.Generate == setup.GenerateManifest {
dir := strings.TrimSuffix(path, "**")
path = filepath.Join(dir, DefaultFilename)
collector(path, slice)
}
}
}

// FindPaths finds the paths marked with "generate:manifest" and
// returns a map from the manifest path to all the slices that declare it.
func FindPaths(slices []*setup.Slice) map[string][]*setup.Slice {
manifestSlices := make(map[string][]*setup.Slice)
collector := func(path string, slice *setup.Slice) {
manifestSlices[path] = append(manifestSlices[path], slice)
}
for _, slice := range slices {
for path, info := range slice.Contents {
if info.Generate == setup.GenerateManifest {
dir := strings.TrimSuffix(path, "**")
path = filepath.Join(dir, DefaultFilename)
manifestSlices[path] = append(manifestSlices[path], slice)
}
}
collectManifests(slice, collector)
}
return manifestSlices
}

// FindPathsInRelease finds all the paths marked with "generate:manifest"
// for the given release.
func FindPathsInRelease(r *setup.Release) []string {
manifestPaths := make([]string,0)
collector := func(path string, slice *setup.Slice) {
manifestPaths = append(manifestPaths, path)
}
for _, pkg := range r.Packages {
for _, slice := range pkg.Slices {
collectManifests(slice, collector)
}
}
return manifestPaths
}

type WriteOptions struct {
PackageInfo []*archive.PackageInfo
Selection []*setup.Slice
Expand Down Expand Up @@ -340,3 +362,12 @@ func Validate(mfest *manifest.Manifest) (err error) {
}
return nil
}

// CompareSchemas compares two manifest schema strings.
func CompareSchemas(va, vb string) int {
if va == manifest.Schema && va == vb {
return 0
}
return -1
}

184 changes: 184 additions & 0 deletions internal/slicer/check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
package slicer

import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"syscall"

"github.com/klauspost/compress/zstd"

"github.com/canonical/chisel/internal/manifestutil"
"github.com/canonical/chisel/public/manifest"
)

type pathInfo struct {
mode string
size int64
link string
hash string
}

func unixPerm(mode fs.FileMode) (perm uint32) {
perm = uint32(mode.Perm())
if mode&fs.ModeSticky != 0 {
perm |= 0o1000
}
return perm
}

// checkRootDir checks the content of the target directory matches with
// the manifest. Files not managed by chisel are ignored.
// This function works under the assumption the manifest is valid.
func checkRootDir(mfest *manifest.Manifest, rootDir string) error {
singlePathsByFSInode := make(map[uint64]string)
fsInodeByManifestInode := make(map[uint64]uint64)
manifestInfos := make(map[string]*pathInfo)
err := mfest.IteratePaths("", func(path *manifest.Path) error {
pathHash := path.FinalSHA256
if pathHash == "" {
pathHash = path.SHA256
}
recordedPathInfo := &pathInfo{
mode: path.Mode,
size: int64(path.Size),
link: path.Link,
hash: pathHash,
}

fsInfo := &pathInfo{}
fullPath := filepath.Join(rootDir, path.Path)
info, err := os.Lstat(fullPath)
if err != nil {
return err
}
mode := info.Mode()
fsInfo.mode = fmt.Sprintf("0%o", unixPerm(mode))
ftype := mode & fs.ModeType
switch ftype {
case fs.ModeDir:
// Nothing to do.
case fs.ModeSymlink:
fsInfo.link, err = os.Readlink(fullPath)
if err != nil {
return fmt.Errorf("cannot read symlink %q: %w", fullPath, err)
}
case 0: // Regular file.
h, err := contentHash(fullPath)
if err != nil {
return fmt.Errorf("cannot compute hash for %q: %w", fullPath, err)
}
fsInfo.hash = hex.EncodeToString(h)
fsInfo.size = info.Size()
default:
return fmt.Errorf("cannot check %q: unrecognized type %s", fullPath, mode.String())
}

// Collect manifests for tailored checking later. Adjust observed hash and
// size to still compare in a generic way.
if filepath.Base(path.Path) == manifestutil.DefaultFilename && recordedPathInfo.size == 0 && recordedPathInfo.hash == "" {
mfestInfo := *fsInfo
manifestInfos[path.Path] = &mfestInfo
fsInfo.size = 0
fsInfo.hash = ""
}

if recordedPathInfo.mode != fsInfo.mode {
return fmt.Errorf("inconsistent mode at %q: recorded %v, observed %v", path.Path, recordedPathInfo.mode, fsInfo.mode)
}
if recordedPathInfo.size != fsInfo.size {
return fmt.Errorf("inconsistent size at %q: recorded %v, observed %v", path.Path, recordedPathInfo.size, fsInfo.size)
}
if recordedPathInfo.link != fsInfo.link {
return fmt.Errorf("inconsistent link at %q: recorded %v, observed %v", path.Path, recordedPathInfo.link, fsInfo.link)
}
if recordedPathInfo.hash != fsInfo.hash {
return fmt.Errorf("inconsistent hash at %q: recorded %v, observed %v", path.Path, recordedPathInfo.hash, fsInfo.hash)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally we use got ..., expected .... But please look around the code to see other error messages and make it consistent.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used these terms at first but they felt opinionated. Since the manifest is found in the rootfs, it could be "wrong" too. But it depends on the abstraction we want to present to the user. I think it would be useful for them to be aware that the inspection relies on the manifest so they can more easily solve the problem if the inspection fails.

If we decide that the usage of the manifest is in an implementation detail then I agree that got ..., expected ... would be more consistent.

WDYT?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we decide that the usage of the manifest is in an implementation detail then I agree that got ..., expected ... would be more consistent.

It is NOT an implementation detail. The fact we use the manifest has to be transparent IMO, how else would we do it. What can the user expect if not using the manifest

}
// Check hardlink.
if ftype != fs.ModeDir {
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("cannot get syscall stat info for %q", info.Name())
}
inode := stat.Ino

if path.Inode == 0 {
// This path must not be linked to any other.
singlePath, ok := singlePathsByFSInode[inode]
if ok {
return fmt.Errorf("inconsistent content at %q: recorded no hardlink, observed hardlinked to %q", path.Path, singlePath)
}
singlePathsByFSInode[inode] = path.Path
} else {
recordedInode, ok := fsInodeByManifestInode[path.Inode]
if !ok {
fsInodeByManifestInode[path.Inode] = inode
} else if recordedInode != inode {
return fmt.Errorf("inconsistent content at %q: file hardlinked to a different inode", path.Path)
}
}
}
return nil
})
if err != nil {
return err
}

// Check manifests.
// They must all be valid manifests and be consistent per schema version.
schemaManifestInfos := make(map[string]*pathInfo)
for path, info := range manifestInfos {
fullPath := filepath.Join(rootDir, path)
f, err := os.Open(fullPath)
if err != nil {
return err
}
defer f.Close()
r, err := zstd.NewReader(f)
if err != nil {
return err
}
defer r.Close()
mfest, err = manifest.Read(r)
if err != nil {
return err

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my other comment, I think we need a named error here to know if we should skip. If we find a valid format then manifest.Validate should succeed or we should return an error, so I think you captured that correctly.

}
err = manifestutil.Validate(mfest)
if err != nil {
return err
}
schema := mfest.Schema()
refInfo, ok := schemaManifestInfos[schema]
if !ok {
schemaManifestInfos[schema] = info
continue
}

if refInfo.size != info.size {
return fmt.Errorf("inconsistent manifest size for version %s at %q: recorded %v, observed %v", schema, path, refInfo.size, info.size)
}
if refInfo.hash != info.hash {
return fmt.Errorf("inconsistent manifest hash for version %s at %q: recorded %v, observed %v", schema, path, refInfo.hash, info.hash)
}
}
return nil
}

func contentHash(path string) ([]byte, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()

h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
return nil, err
}
return h.Sum(nil), nil
}
92 changes: 92 additions & 0 deletions internal/slicer/slicer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ package slicer
import (
"archive/tar"
"bytes"
"encoding/hex"
"fmt"
"io"
"io/fs"
"os"
"path"
"path/filepath"
"slices"
"sort"
Expand All @@ -21,6 +23,7 @@ import (
"github.com/canonical/chisel/internal/manifestutil"
"github.com/canonical/chisel/internal/scripts"
"github.com/canonical/chisel/internal/setup"
"github.com/canonical/chisel/public/manifest"
)

const manifestMode fs.FileMode = 0644
Expand Down Expand Up @@ -537,3 +540,92 @@ func selectPkgArchives(archives map[string]archive.Archive, selection *setup.Sel
}
return pkgArchive, nil
}

// Inspect examines and validates the targetDir. It returns, if found and valid
// the manifest representing the content in the targetDir.
func Inspect(targetDir string, release *setup.Release) (*manifest.Manifest, error) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this name and comment is too generic, let's think of something better. What about VerifyPreviousCut or VerifyCut.

One problem I see with this grouping is that when I get back (nil, nil) it is not easy for me as a user to know if it means there was a manifest but it didn't match the content, or whether there was no manifest at all. The former is obviously not what we want, if we want a manifest then we want to get it or fail. It might also be the name that is throwing me off.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this name and comment is too generic, let's think of something better. What about VerifyPreviousCut or VerifyCut.

The "cut" concept is an external abstraction, unknown to the slicer. I was looking for a name at the same level of abstraction than Run. At the beginning I also used the verify term but while discussing it with Ed I understood that it had a subtly different meaning than I thought. "Verify" is "prove the truth of", and at the abstraction level of Inspect I think it is not exactly (and not only) what Inspect does.

One problem I see with this grouping is that when I get back (nil, nil) it is not easy for me as a user to know if it means there was a manifest but it didn't match the content, or whether there was no manifest at all. The former is obviously not what we want, if we want a manifest then we want to get it or fail. It might also be the name that is throwing me off.

I agree that (nil, nil) is ambiguous and I am not super satisfied with it. However for now I could not see another way. Do you have a concrete proposition to organize the code in a way that would avoid that?

var mfest *manifest.Manifest
manifestPaths := manifestutil.FindPathsInRelease(release)
if len(manifestPaths) > 0 {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefer if len(manifestPaths) == 0 // early return rather than extra indentation.

logf("Inspecting root directory...")
var err error
mfest, err = selectValidManifest(targetDir, manifestPaths)
if err != nil {
return nil, err
}
if mfest != nil {
err = checkRootDir(mfest, targetDir)
if err != nil {
return nil, err
}
}
}
return mfest, nil
}

// selectValidManifest returns, if found, a valid manifest with the latest
// schema. Consistency with all other manifests with the same schema is verified
// so the selection is deterministic.
func selectValidManifest(targetDir string, manifestPaths []string) (*manifest.Manifest, error) {
targetDir = filepath.Clean(targetDir)
if !filepath.IsAbs(targetDir) {
dir, err := os.Getwd()
if err != nil {
return nil, fmt.Errorf("cannot obtain current directory: %w", err)
}
targetDir = filepath.Join(dir, targetDir)
}
Comment on lines +570 to +577

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are doing this every time for an internal function. If the function is internal then you can expect an absolute path and you can document it or check with IsAbs and return an error, either one of them work. If the function is public then you have to choice to make it absolute or fail. In that case I prefer the former to follow convention.


type manifestHash struct {
path string
hash string
}
var selected *manifest.Manifest
schemaManifest := make(map[string]manifestHash)
for _, mfestPath := range manifestPaths {
err := func() error {
mfestFullPath := path.Join(targetDir, mfestPath)
f, err := os.Open(mfestFullPath)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
defer f.Close()
r, err := zstd.NewReader(f)
if err != nil {
return err
}
defer r.Close()
mfest, err := manifest.Read(r)
if err != nil {
return nil

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my other other comment. Here it is even more important. And manifest.Validate has to be an error.

}
err = manifestutil.Validate(mfest)
if err != nil {
return nil
}

if selected == nil || manifestutil.CompareSchemas(mfest.Schema(), selected.Schema()) > 0 {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compare is never > 0. In my opinion, in terms of logic the code should check all manifests of the same version have to be equal. You don't have to use the content has because you have both manifests in memory and you need to return selected anyways, so you can check whether they match or not (in a separate function please).

h, err := contentHash(mfestFullPath)
if err != nil {
return fmt.Errorf("cannot compute hash for %q: %w", mfestFullPath, err)
}
mfestHash := hex.EncodeToString(h)
refMfest, ok := schemaManifest[mfest.Schema()]
if !ok {
schemaManifest[mfest.Schema()] = manifestHash{mfestPath, mfestHash}
} else if refMfest.hash != mfestHash {
return fmt.Errorf("inconsistent manifests: %q and %q", refMfest.path, mfestPath)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will likely change, but this error message doesn't state that the contents of the manifests should be equal and they are not.

}
selected = mfest
}
return nil
}()
if err != nil {
return nil, err
}
}
return selected, nil
}
Loading