From 217dd9cfd7949c6cf305c9d2c55621cc7afee579 Mon Sep 17 00:00:00 2001 From: Avi Deitcher Date: Tue, 10 Feb 2026 09:22:48 +0200 Subject: [PATCH] add copy test for ext4 Signed-off-by: Avi Deitcher --- filesystem/ext4/checksum.go | 2 - filesystem/ext4/directory.go | 33 +- filesystem/ext4/directory_test.go | 2 +- filesystem/ext4/directoryentry.go | 19 +- filesystem/ext4/ext4.go | 505 ++++++++++++++++++++--- filesystem/ext4/ext4_integration_test.go | 263 ++++++++++++ filesystem/ext4/extent.go | 225 ++++++++-- filesystem/ext4/file.go | 22 +- filesystem/ext4/inode.go | 45 +- go.mod | 2 +- go.sum | 4 - sync/copy.go | 139 +++++-- sync/copy_test.go | 5 +- 13 files changed, 1077 insertions(+), 189 deletions(-) create mode 100644 filesystem/ext4/ext4_integration_test.go diff --git a/filesystem/ext4/checksum.go b/filesystem/ext4/checksum.go index b84d2842..2e2394b3 100644 --- a/filesystem/ext4/checksum.go +++ b/filesystem/ext4/checksum.go @@ -29,8 +29,6 @@ func directoryChecksummer(seed, inodeNumber, inodeGeneration uint32) checksummer // directoryChecksumAppender returns a function that implements checksumAppender for a directory entries block // original calculations can be seen for e2fsprogs https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/ext2fs/csum.c#n301 // and in the linux tree https://github.com/torvalds/linux/blob/master/fs/ext4/namei.c#L376-L384 -// -//nolint:unparam // inodeGeneration is always 0 func directoryChecksumAppender(seed, inodeNumber, inodeGeneration uint32) checksumAppender { fn := directoryChecksummer(seed, inodeNumber, inodeGeneration) return func(b []byte) []byte { diff --git a/filesystem/ext4/directory.go b/filesystem/ext4/directory.go index 24535f02..e8cdc666 100644 --- a/filesystem/ext4/directory.go +++ b/filesystem/ext4/directory.go @@ -19,7 +19,7 @@ type Directory struct { } // toBytes convert our entries to raw bytes. Provides checksum as well. Final returned byte slice will be a multiple of bytesPerBlock. -func (d *Directory) toBytes(bytesPerBlock uint32, checksumFunc checksumAppender) []byte { +func (d *Directory) toBytes(bytesPerBlock uint32, checksumFunc checksumAppender, withChecksums bool) []byte { b := make([]byte, 0) var ( previousLength int @@ -30,26 +30,47 @@ func (d *Directory) toBytes(bytesPerBlock uint32, checksumFunc checksumAppender) if len(d.entries) == 0 { return b } + checksumSize := 0 + if withChecksums { + checksumSize = minDirEntryLength + } + blockLimit := int(bytesPerBlock) - checksumSize lastEntryCount = len(d.entries) - 1 for i, de := range d.entries { b2 := de.toBytes(0) switch { - case len(block)+len(b2) > int(bytesPerBlock)-minDirEntryLength: + case len(block)+len(b2) > blockLimit: // if adding this one will go past the end of the block, pad out the previous block = block[:len(block)-previousLength] - previousB := previousEntry.toBytes(uint16(int(bytesPerBlock) - len(block) - minDirEntryLength)) + previousB := previousEntry.toBytes(uint16(blockLimit - len(block))) block = append(block, previousB...) // add the checksum - block = checksumFunc(block) + if withChecksums { + block = checksumFunc(block) + } b = append(b, block...) // start a new block block = make([]byte, 0) + // add current entry to the new block + if i == lastEntryCount { + b2 = de.toBytes(uint16(blockLimit - len(block))) + block = append(block, b2...) + if withChecksums { + block = checksumFunc(block) + } + b = append(b, block...) + block = make([]byte, 0) + } else { + block = append(block, b2...) + } case i == lastEntryCount: // if this is the last one, pad it out - b2 = de.toBytes(uint16(int(bytesPerBlock) - len(block) - minDirEntryLength)) + b2 = de.toBytes(uint16(blockLimit - len(block))) block = append(block, b2...) // add the checksum - block = checksumFunc(block) + if withChecksums { + block = checksumFunc(block) + } b = append(b, block...) // start a new block block = make([]byte, 0) diff --git a/filesystem/ext4/directory_test.go b/filesystem/ext4/directory_test.go index 085d2052..f18b5f9e 100644 --- a/filesystem/ext4/directory_test.go +++ b/filesystem/ext4/directory_test.go @@ -16,7 +16,7 @@ func TestDirectoryToBytes(t *testing.T) { if err != nil { t.Fatal(err) } - b := dir.toBytes(bytesPerBlock, directoryChecksumAppender(sb.checksumSeed, 2, 0)) + b := dir.toBytes(bytesPerBlock, directoryChecksumAppender(sb.checksumSeed, 2, 0), true) // read the bytes from the disk diff, diffString := testhelper.DumpByteSlicesWithDiffs(b, expected, 32, false, true, true) diff --git a/filesystem/ext4/directoryentry.go b/filesystem/ext4/directoryentry.go index b4e1e649..29753989 100644 --- a/filesystem/ext4/directoryentry.go +++ b/filesystem/ext4/directoryentry.go @@ -184,19 +184,32 @@ type directoryEntryInfo struct { } func (de *directoryEntryInfo) Info() (iofs.FileInfo, error) { + mode := iofs.FileMode(0) + isDir := de.directoryEntry.fileType == dirFileTypeDirectory + if isDir { + mode |= iofs.ModeDir + } + if de.inode != nil && de.inode.fileType == fileTypeSymbolicLink { + mode |= iofs.ModeSymlink + } return &FileInfo{ modTime: de.modifyTime, name: de.filename, size: int64(de.size), - isDir: de.directoryEntry.fileType == dirFileTypeDirectory, + isDir: isDir, + mode: mode, }, nil } func (de *directoryEntryInfo) Type() iofs.FileMode { + mode := iofs.FileMode(0) if de.directoryEntry.fileType == dirFileTypeDirectory { - return iofs.ModeDir + mode |= iofs.ModeDir + } + if de.inode != nil && de.inode.fileType == fileTypeSymbolicLink { + mode |= iofs.ModeSymlink } - return 0 + return mode } func (de *directoryEntryInfo) IsDir() bool { diff --git a/filesystem/ext4/ext4.go b/filesystem/ext4/ext4.go index c34d332a..38a95ac3 100644 --- a/filesystem/ext4/ext4.go +++ b/filesystem/ext4/ext4.go @@ -115,6 +115,122 @@ type FileSystem struct { backupSuperblocks []int64 } +func (fs *FileSystem) dirChecksumAppender(inodeNumber, inodeGeneration uint32) checksumAppender { + if fs.superblock.features.metadataChecksums { + return directoryChecksumAppender(fs.superblock.checksumSeed, inodeNumber, inodeGeneration) + } + return nullDirectoryChecksummer +} + +func (fs *FileSystem) writeDirectory(parentInode *inode, dirBytes []byte) error { + blockSize := int(fs.superblock.blockSize) + if blockSize == 0 { + return fmt.Errorf("invalid block size") + } + requiredBlocks := (len(dirBytes) + blockSize - 1) / blockSize + + extents, err := parentInode.extents.blocks(fs) + if err != nil { + return fmt.Errorf("could not read parent extents for directory: %w", err) + } + if uint64(requiredBlocks) > extents.blockCount() { + newExtents, err := fs.allocateExtents(uint64(len(dirBytes)), &extents) + if err != nil { + return fmt.Errorf("could not allocate disk space for directory: %w", err) + } + combined := extents[:] + combined = append(combined, (*newExtents)...) + combined = mergeExtents(combined) + // if we need more than can fit in the inode, we have to use internal nodes. We allocate entirely new extents, + // and then remove the marking of the old ones. + if len(combined) > 4 { + freshExtents, err := fs.allocateExtents(uint64(len(dirBytes)), nil) + if err != nil { + return fmt.Errorf("could not allocate contiguous extents for directory: %w", err) + } + if len(*freshExtents) > 4 { + return fmt.Errorf("directory requires %d extents; internal nodes not supported", len(*freshExtents)) + } + if err := fs.deallocateExtents(combined); err != nil { + return fmt.Errorf("could not deallocate old extents for directory: %w", err) + } + combined = *freshExtents + } + parentInode.extents = &extentLeafNode{ + extentNodeHeader: extentNodeHeader{ + depth: 0, + entries: uint16(len(combined)), + max: 4, + blockSize: fs.superblock.blockSize, + }, + extents: combined, + } + extents = combined + } + sort.Slice(extents, func(i, j int) bool { + return extents[i].fileBlock < extents[j].fileBlock + }) + + parentInode.size = uint64(len(dirBytes)) + if parentInode.filesystemBlocks { + parentInode.blocks = uint64(requiredBlocks) + } else { + parentInode.blocks = uint64(requiredBlocks) * uint64(blockSize) / 512 + } + if err := fs.writeInode(parentInode); err != nil { + return fmt.Errorf("could not write inode for directory: %w", err) + } + + writableFile, err := fs.backend.Writable() + if err != nil { + return err + } + written := 0 + for _, e := range extents { + for i := 0; i < int(e.count); i++ { + if written >= len(dirBytes) { + return nil + } + blockStart := (e.startingBlock + uint64(i)) * uint64(blockSize) + end := written + blockSize + if end > len(dirBytes) { + end = len(dirBytes) + } + if _, err := writableFile.WriteAt(dirBytes[written:end], int64(blockStart)); err != nil { + return fmt.Errorf("could not write directory data: %w", err) + } + written = end + } + } + if written != len(dirBytes) { + return fmt.Errorf("wrote only %d bytes instead of expected %d for directory", written, len(dirBytes)) + } + return nil +} + +func mergeExtents(es extents) extents { + if len(es) < 2 { + return es + } + sort.Slice(es, func(i, j int) bool { + return es[i].fileBlock < es[j].fileBlock + }) + out := make(extents, 0, len(es)) + current := es[0] + for i := 1; i < len(es); i++ { + next := es[i] + if uint64(current.fileBlock)+uint64(current.count) == uint64(next.fileBlock) && + current.startingBlock+uint64(current.count) == next.startingBlock { + current.count += next.count + continue + } + out = append(out, current) + current = next + } + out = append(out, current) + return out +} + // Equal compare if two filesystems are equal func (fs *FileSystem) Equal(a *FileSystem) bool { localMatch := fs.backend == a.backend @@ -465,7 +581,7 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS backupSuperblockGroups := calculateBackupSuperblockGroups(blockGroups) backupSuperblocks = []int64{0} for _, bg := range backupSuperblockGroups { - backupSuperblocks = append(backupSuperblocks, bg*int64(blocksPerGroup)) + backupSuperblocks = append(backupSuperblocks, bg*int64(blocksPerGroup)+int64(firstDataBlock)) } } @@ -795,10 +911,109 @@ func (fs *FileSystem) Link(oldpath, newpath string) error { } // creates a symbolic link named linkpath which contains the string target. -// -//nolint:revive // parameters will be used eventually func (fs *FileSystem) Symlink(oldpath, newpath string) error { - return filesystem.ErrNotImplemented + if err := validatePath(newpath); err != nil { + return err + } + parentDir, entry, err := fs.getEntryAndParent(newpath) + if err != nil { + return err + } + if entry != nil { + return fmt.Errorf("target file %s already exists", newpath) + } + + inodeNumber, err := fs.allocateInode(parentDir.inode, 0) + if err != nil { + return fmt.Errorf("could not allocate inode for symlink %s: %w", newpath, err) + } + + de := directoryEntry{ + inode: inodeNumber, + filename: path.Base(newpath), + fileType: dirFileTypeSymlink, + } + parentDir.entries = append(parentDir.entries, &de) + + parentInode, err := fs.readInode(parentDir.inode) + if err != nil { + return fmt.Errorf("could not read inode %d of parent directory: %w", parentDir.inode, err) + } + parentDirBytes := parentDir.toBytes( + fs.superblock.blockSize, + fs.dirChecksumAppender(parentDir.inode, parentInode.nfsFileVersion), + fs.superblock.features.metadataChecksums, + ) + if err := fs.writeDirectory(parentInode, parentDirBytes); err != nil { + return fmt.Errorf("unable to write new directory entry: %w", err) + } + + now := time.Now() + perms := filePermissions{read: true, write: true, execute: true} + in := inode{ + number: inodeNumber, + permissionsGroup: perms, + permissionsOwner: perms, + permissionsOther: perms, + fileType: fileTypeSymbolicLink, + owner: parentInode.owner, + group: parentInode.group, + size: uint64(len(oldpath)), + hardLinks: 1, + flags: &inodeFlags{}, + nfsFileVersion: 0, + version: 0, + inodeSize: fs.superblock.inodeSize, + accessTime: now, + changeTime: now, + createTime: now, + modifyTime: now, + linkTarget: oldpath, + } + + if len(oldpath) >= 60 { + newExtents, err := fs.allocateExtents(uint64(len(oldpath)), nil) + if err != nil { + return fmt.Errorf("could not allocate disk space for symlink %s: %w", newpath, err) + } + extentTreeParsed, metaBlocks, err := extendExtentTree(nil, newExtents, fs, nil) + if err != nil { + return fmt.Errorf("could not convert extents into tree for symlink %s: %w", newpath, err) + } + extentsFSBlockCount := newExtents.blockCount() + metaBlocks + in.blocks = extentsFSBlockCount * uint64(fs.superblock.blockSize) / 512 + in.flags.usesExtents = true + in.extents = extentTreeParsed + } + + if err := fs.writeInode(&in); err != nil { + return fmt.Errorf("could not write inode for symlink %s: %w", newpath, err) + } + + if len(oldpath) >= 60 { + extents, err := in.extents.blocks(fs) + if err != nil { + return fmt.Errorf("could not read extents for symlink %s: %w", newpath, err) + } + linkFile := &File{ + inode: &in, + fileType: dirFileTypeSymlink, + filesystem: fs, + isReadWrite: true, + isAppend: true, + offset: 0, + extents: extents, + } + wrote, err := linkFile.Write([]byte(oldpath)) + if err != nil && err != io.EOF { + return fmt.Errorf("unable to write symlink target %s: %w", newpath, err) + } + if wrote != len(oldpath) { + return fmt.Errorf("wrote only %d bytes instead of expected %d for symlink target %s", wrote, len(oldpath), newpath) + } + } + + return nil } // Chtimes changes the file creation, access and modification times @@ -875,6 +1090,28 @@ func (fs *FileSystem) Chmod(name string, mode os.FileMode) error { return fs.writeInode(inode) } +// Readlink returns the target of a symbolic link. +func (fs *FileSystem) ReadLink(p string) (string, error) { + if err := validatePath(p); err != nil { + return "", err + } + _, entry, err := fs.getEntryAndParent(p) + if err != nil { + return "", err + } + if entry == nil { + return "", fmt.Errorf("target file %s does not exist", p) + } + inode, err := fs.readInode(entry.inode) + if err != nil { + return "", fmt.Errorf("could not read inode number %d: %v", entry.inode, err) + } + if inode.fileType != fileTypeSymbolicLink { + return "", fmt.Errorf("target file %s is not a symbolic link", p) + } + return inode.linkTarget, nil +} + // Chown changes the numeric uid and gid of the named file. If the file is a symbolic link, // it changes the uid and gid of the link's target. A uid or gid of -1 means to not change that value func (fs *FileSystem) Chown(name string, uid, gid int) error { @@ -938,14 +1175,14 @@ func (fs *FileSystem) ReadDir(p string) ([]iofs.DirEntry, error) { count := len(dir.entries) ret := make([]iofs.DirEntry, 0, count) for i, e := range dir.entries { + if e.inode == 0 || e.filename == "." || e.filename == ".." || e.filename == "" { + // skip these entries + continue + } in, err := fs.readInode(e.inode) if err != nil { return nil, fmt.Errorf("could not read inode %d at position %d in directory: %v", e.inode, i, err) } - if e.filename == "." || e.filename == ".." || e.filename == "" { - // skip these entries - continue - } ret = append(ret, &directoryEntryInfo{ inode: in, directoryEntry: e, @@ -1202,15 +1439,16 @@ func (fs *FileSystem) Remove(p string) error { newEntries = append(newEntries, e) } parentDir.entries = newEntries - // write the parent directory back - dirBytes := parentDir.toBytes( - fs.superblock.blockSize, - directoryChecksumAppender(fs.superblock.checksumSeed, parentDir.inode, 0), - ) parentInode, err := fs.readInode(parentDir.inode) if err != nil { return fmt.Errorf("could not read inode %d for %s: %v", entry.inode, path.Base(p), err) } + // write the parent directory back + dirBytes := parentDir.toBytes( + fs.superblock.blockSize, + fs.dirChecksumAppender(parentDir.inode, parentInode.nfsFileVersion), + fs.superblock.features.metadataChecksums, + ) extents, err = parentInode.extents.blocks(fs) if err != nil { return fmt.Errorf("could not read extents for inode %d for %s: %v", entry.inode, path.Base(p), err) @@ -1479,7 +1717,6 @@ func (fs *FileSystem) readDirectory(inodeNumber uint32) ([]*directoryEntry, erro } var dirEntries []*directoryEntry - // TODO: none of this works for hashed dir entries, indicated by in.flags.hashedDirectoryIndexes == true if in.flags.hashedDirectoryIndexes { treeRoot, err := parseDirectoryTreeRoot(b[:fs.superblock.blockSize], fs.superblock.features.largeDirectory) if err != nil { @@ -1497,7 +1734,21 @@ func (fs *FileSystem) readDirectory(inodeNumber uint32) ([]*directoryEntry, erro dirEntries, err = parseDirEntriesLinear(b, fs.superblock.features.metadataChecksums, fs.superblock.blockSize, in.number, in.nfsFileVersion, fs.superblock.checksumSeed) } - return dirEntries, err + if err != nil { + return nil, err + } + // filter out checksum entries (inode 0) and empty names + filtered := dirEntries[:0] + for _, de := range dirEntries { + if de == nil { + continue + } + if de.inode == 0 || de.filename == "" { + continue + } + filtered = append(filtered, de) + } + return filtered, nil } // readFileBytes read all of the bytes for an individual file pointed at by a given inode @@ -1552,7 +1803,7 @@ func (fs *FileSystem) readDirWithMkdir(p string, doMake bool) (*Directory, error } entries, err := fs.readDirectory(rootInode) if err != nil { - return nil, fmt.Errorf("failed to read directory %s", "/") + return nil, fmt.Errorf("failed to read directory %s: %v", "/", err) } currentDir.entries = entries for i, subp := range paths { @@ -1685,38 +1936,25 @@ func (fs *FileSystem) mkDirEntry(parent *Directory, name string, isDir bool) (*d fileType: deFileType, } parent.entries = append(parent.entries, &de) - // write the parent out to disk - bytesPerBlock := fs.superblock.blockSize - parentDirBytes := parent.toBytes(bytesPerBlock, directoryChecksumAppender(fs.superblock.checksumSeed, parent.inode, 0)) // check if parent has increased in size beyond allocated blocks parentInode, err := fs.readInode(parent.inode) if err != nil { return nil, fmt.Errorf("could not read inode %d of parent directory: %w", parent.inode, err) } - - // write the directory entry in the parent - // figure out which block it goes into, and possibly rebalance the directory entries hash tree - parentExtents, err := parentInode.extents.blocks(fs) - if err != nil { - return nil, fmt.Errorf("could not read parent extents for directory: %w", err) - } - dirFile := &File{ - inode: parentInode, - filename: name, - fileType: dirFileTypeDirectory, - filesystem: fs, - isReadWrite: true, - isAppend: true, - offset: 0, - extents: parentExtents, + if isDir { + // increment the hard link count for the parent directory, since we are adding a new entry that points to it + parentInode.hardLinks++ } - wrote, err := dirFile.Write(parentDirBytes) - if err != nil && err != io.EOF { + // write the parent out to disk + bytesPerBlock := fs.superblock.blockSize + parentDirBytes := parent.toBytes( + bytesPerBlock, + fs.dirChecksumAppender(parent.inode, parentInode.nfsFileVersion), + fs.superblock.features.metadataChecksums, + ) + if err := fs.writeDirectory(parentInode, parentDirBytes); err != nil { return nil, fmt.Errorf("unable to write new directory: %w", err) } - if wrote != len(parentDirBytes) { - return nil, fmt.Errorf("wrote only %d bytes instead of expected %d for new directory", wrote, len(parentDirBytes)) - } // normally, after getting a tree from extents, you would need to then allocate all of the blocks // in the extent tree - leafs and intermediate. However, because we are allocating a new directory @@ -1730,6 +1968,13 @@ func (fs *FileSystem) mkDirEntry(parent *Directory, name string, isDir bool) (*d return nil, fmt.Errorf("could not initialize file %s: %w", name, err) } + if isDir { + bg := blockGroupForInode(int(inodeNumber), fs.superblock.inodesPerGroup) + if err := fs.incrGDUsedDirs(bg, 1); err != nil { + return nil, fmt.Errorf("could not increment used directory count in group descriptor: %w", err) + } + } + // return return &de, nil } @@ -1750,7 +1995,7 @@ func (fs *FileSystem) initFile(inodeNumber, parentInodeNumber uint32, ft fileTyp if err != nil { return fmt.Errorf("could not allocate disk space: %w", err) } - extentTreeParsed, err = extendExtentTree(nil, newExtents, fs, nil) + extentTreeParsed, _, err = extendExtentTree(nil, newExtents, fs, nil) if err != nil { return fmt.Errorf("could not convert extents into tree: %w", err) } @@ -1819,7 +2064,11 @@ func (fs *FileSystem) initFile(inodeNumber, parentInodeNumber uint32, ft fileTyp root: false, entries: initialEntries, } - dirBytes := newDir.toBytes(fs.superblock.blockSize, directoryChecksumAppender(fs.superblock.checksumSeed, inodeNumber, 0)) + dirBytes := newDir.toBytes( + fs.superblock.blockSize, + fs.dirChecksumAppender(inodeNumber, in.nfsFileVersion), + fs.superblock.features.metadataChecksums, + ) // write the bytes out to disk dirFile := &File{ inode: &in, @@ -1857,6 +2106,7 @@ func (fs *FileSystem) allocateInode(parent uint32, requested int) (uint32, error bg int gd groupDescriptor bm *bitmap.Bitmap + err error ) switch { case requested != 0: @@ -1865,11 +2115,6 @@ func (fs *FileSystem) allocateInode(parent uint32, requested int) (uint32, error inodeNumber = 2 } - writableFile, err := fs.backend.Writable() - if err != nil { - return 0, err - } - // if a specific inode was requested, then try to get that one if inodeNumber != -1 { // try to allocate the requested inode @@ -1892,7 +2137,7 @@ func (fs *FileSystem) allocateInode(parent uint32, requested int) (uint32, error // get first free inode, will return -1 if none free inodeInBG := bm.FirstFree(0) if inodeInBG != -1 { - inodeNumber = inodeInBG + int(fs.superblock.inodesPerGroup)*bg + inodeNumber = inodeInBG + int(fs.superblock.inodesPerGroup)*bg + 1 break } } @@ -1903,7 +2148,7 @@ func (fs *FileSystem) allocateInode(parent uint32, requested int) (uint32, error return 0, errors.New("no free inodes available") } - inodeInBG := inodeNumber - int(fs.superblock.inodesPerGroup)*bg + inodeInBG := inodeNumber - int(fs.superblock.inodesPerGroup)*bg - 1 isSet, err := bm.IsSet(inodeInBG) if err != nil { return 0, fmt.Errorf("could not check inode bitmap for requested inode %d: %w", requested, err) @@ -1921,22 +2166,8 @@ func (fs *FileSystem) allocateInode(parent uint32, requested int) (uint32, error } // reduce number of free inodes in that descriptor in the group descriptor table - gd.freeInodes-- - - // get the group descriptor as bytes - gdBytes := gd.toBytes(fs.superblock.gdtChecksumType(), fs.superblock.checksumSeed) - - // write the group descriptor bytes - // gdt starts in block 1 of any redundant copies, specifically in BG 0 - gdtBlock := 1 - blockByteLocation := gdtBlock * int(fs.superblock.blockSize) - gdOffset := int64(blockByteLocation) + int64(bg)*int64(fs.superblock.groupDescriptorSize) - wrote, err := writableFile.WriteAt(gdBytes, gdOffset) - if err != nil { - return 0, fmt.Errorf("unable to write group descriptor bytes for blockgroup %d: %v", bg, err) - } - if wrote != len(gdBytes) { - return 0, fmt.Errorf("wrote only %d bytes instead of expected %d for group descriptor of block group %d", wrote, len(gdBytes), bg) + if err := fs.incrGDFreeInodes(bg, -1); err != nil { + return 0, fmt.Errorf("could not decrement free inodes for block group %d: %w", bg, err) } // update inode count in superblock @@ -1953,6 +2184,8 @@ func (fs *FileSystem) allocateInode(parent uint32, requested int) (uint32, error // arguments are file size in bytes and existing extents // if previous is nil, then we are not (re)sizing an existing file but creating a new one // returns the extents to be used in order +// +//nolint:gocyclo // this is a long function, but it is not very complex, and breaking it up would make it less clear func (fs *FileSystem) allocateExtents(size uint64, previous *extents) (*extents, error) { // 1- calculate how many blocks are needed required := size / uint64(fs.superblock.blockSize) @@ -1978,6 +2211,46 @@ func (fs *FileSystem) allocateExtents(size uint64, previous *extents) (*extents, return nil, fmt.Errorf("only %d blocks free, requires additional %d", fs.superblock.freeBlocks, extraBlockCount) } + // fast path: find a single contiguous extent large enough + if extraBlockCount > 0 && extraBlockCount <= uint64(maxBlocksPerExtent) { + for i := int64(0); i < fs.blockGroups; i++ { + bs, err := fs.readBlockBitmap(int(i)) + if err != nil { + return nil, fmt.Errorf("could not read block bitmap for block group %d: %v", i, err) + } + blockList := bs.FreeList() + groupStart := uint64(fs.superblock.firstDataBlock) + uint64(i)*uint64(fs.superblock.blocksPerGroup) + for _, freeBlock := range blockList { + if uint64(freeBlock.Count) < extraBlockCount { + continue + } + start := uint64(freeBlock.Position) + extentToAdd := extent{ + startingBlock: start + groupStart, + count: uint16(extraBlockCount), + fileBlock: uint32(allocated), + } + for block := extentToAdd.startingBlock; block < extentToAdd.startingBlock+uint64(extentToAdd.count); block++ { + blockInGroup := block - groupStart + if err := bs.Set(int(blockInGroup)); err != nil { + return nil, fmt.Errorf("could not set block bitmap for block %d: %v", i, err) + } + } + if err := fs.writeBlockBitmap(bs, int(i)); err != nil { + return nil, fmt.Errorf("could not write block bitmap for block group %d: %v", i, err) + } + if err := fs.incrGDFreeBlocks(int(i), -int32(extentToAdd.count)); err != nil { + return nil, fmt.Errorf("could not update free block count in GDT for block group %d: %v", i, err) + } + fs.superblock.freeBlocks -= extraBlockCount + if err := fs.writeSuperblock(); err != nil { + return nil, fmt.Errorf("could not write superblock: %w", err) + } + return &extents{extentToAdd}, nil + } + } + } + // now we need to look for as many contiguous blocks as possible // first calculate the minimum number of extents needed @@ -2039,6 +2312,8 @@ func (fs *FileSystem) allocateExtents(size uint64, previous *extents) (*extents, if uint64(ext.count) >= extraBlockCount { extentToAdd = extent{startingBlock: ext.startingBlock, count: uint16(extraBlockCount)} } + extentToAdd.fileBlock = uint32(allocated) + allocated += uint64(extentToAdd.count) newExtents = append(newExtents, extentToAdd) allocatedBlocks += uint64(extentToAdd.count) extraBlockCount -= uint64(extentToAdd.count) @@ -2084,6 +2359,57 @@ func (fs *FileSystem) allocateExtents(size uint64, previous *extents) (*extents, return &exten, nil } +// deallocateExtents remove the given list of extents from marked as used. +// reverse of allocateExtents. +func (fs *FileSystem) deallocateExtents(toClear extents) error { + // we clear them all, so we keep a cache of the block bitmaps we have updated, so we do not have to read/write + // the same bitmap multiple times if there are multiple extents in the same block group + blockBitmaps := map[int]*bitmap.Bitmap{} + // we also keep track of how many blocks we have added back to each block group, so we can update the + // GDT entries at the end + gdBlockDelta := map[int]int32{} + for _, e := range toClear { + // get the block group for the blocks in the extents + for block := e.startingBlock; block < e.startingBlock+uint64(e.count); block++ { + bg := blockGroupForBlock(int(block), fs.superblock.blocksPerGroup) + // clear the block bitmap entries for the blocks in the extents + if _, ok := blockBitmaps[bg]; !ok { + bs, err := fs.readBlockBitmap(bg) + if err != nil { + return fmt.Errorf("could not read block bitmap for block group %d: %v", bg, err) + } + blockBitmaps[bg] = bs + } + bs := blockBitmaps[bg] + blockInGroup := block - (uint64(fs.superblock.firstDataBlock) + uint64(bg)*uint64(fs.superblock.blocksPerGroup)) + if err := bs.Clear(int(blockInGroup)); err != nil { + return fmt.Errorf("could not clear block bitmap for block %d in block group %d: %v", block, bg, err) + } + // increment the free block count in the GDT for the block group + if _, ok := gdBlockDelta[bg]; !ok { + gdBlockDelta[bg] = 0 + } + gdBlockDelta[bg]++ + // update the superblock free block count + fs.superblock.freeBlocks++ + } + } + for bg, bs := range blockBitmaps { + if err := fs.writeBlockBitmap(bs, bg); err != nil { + return fmt.Errorf("could not write block bitmap for block group %d: %v", bg, err) + } + } + for bg, delta := range gdBlockDelta { + if err := fs.incrGDFreeBlocks(bg, delta); err != nil { + return fmt.Errorf("could not update free block count in GDT for block group %d: %v", bg, err) + } + } + if err := fs.writeSuperblock(); err != nil { + return fmt.Errorf("could not write superblock: %w", err) + } + return nil +} + // readInodeBitmap read the inode bitmap off the disk. // This would be more efficient if we just read one group descriptor's bitmap // but for now we are about functionality, not efficiency, so it will read the whole thing. @@ -2181,6 +2507,30 @@ func (fs *FileSystem) writeBlockBitmap(bm *bitmap.Bitmap, group int) error { return nil } +// incrGDUsedDirs increment the number of used directories in the group descriptor for a given block group. +// If count is negative, decrement. +func (fs *FileSystem) incrGDUsedDirs(group int, count int32) error { + if group >= len(fs.groupDescriptors.descriptors) { + return fmt.Errorf("block group %d does not exist", group) + } + gd := &fs.groupDescriptors.descriptors[group] + switch { + case count > 0: + gd.usedDirectories += uint32(count) + case count < 0: + absCount := uint32(-count) + if gd.usedDirectories < absCount { + return fmt.Errorf("cannot decrement used directories by %d in block group %d since only %d are used", -count, group, gd.usedDirectories) + } + gd.usedDirectories -= absCount + default: + // no change + return nil + } + + return fs.writeGDT() +} + // incrGDFreeBlocks increment the number of free blocks in the group descriptor for a given block group. // If count is negative, decrement. func (fs *FileSystem) incrGDFreeBlocks(group int, count int32) error { @@ -2199,6 +2549,31 @@ func (fs *FileSystem) incrGDFreeBlocks(group int, count int32) error { gd.freeBlocks -= absCount default: // no change + return nil + } + + return fs.writeGDT() +} + +// incrGDFreeInodes increment the number of free inodes in the group descriptor for a given block group. +// If count is negative, decrement. +func (fs *FileSystem) incrGDFreeInodes(group int, count int32) error { + if group >= len(fs.groupDescriptors.descriptors) { + return fmt.Errorf("block group %d does not exist", group) + } + gd := &fs.groupDescriptors.descriptors[group] + switch { + case count > 0: + gd.freeInodes += uint32(count) + case count < 0: + absCount := uint32(-count) + if gd.freeInodes < absCount { + return fmt.Errorf("cannot decrement free inodes by %d in block group %d since only %d are free", -count, group, gd.freeInodes) + } + gd.freeInodes -= absCount + default: + // no change + return nil } return fs.writeGDT() diff --git a/filesystem/ext4/ext4_integration_test.go b/filesystem/ext4/ext4_integration_test.go new file mode 100644 index 00000000..045be738 --- /dev/null +++ b/filesystem/ext4/ext4_integration_test.go @@ -0,0 +1,263 @@ +package ext4_test + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io" + "io/fs" + "os" + "os/exec" + "path" + "path/filepath" + "sort" + "testing" + + "github.com/diskfs/go-diskfs/backend/file" + "github.com/diskfs/go-diskfs/filesystem/ext4" + diskfsync "github.com/diskfs/go-diskfs/sync" +) + +const imgFile = "testdata/dist/ext4.img" + +var excludedPaths = map[string]bool{ + "lost+found": true, + ".DS_Store": true, + "System Volume Information": true, +} + +func testCreateEmptyFile(t *testing.T, size int64) (string, *os.File) { + t.Helper() + dir := t.TempDir() + outfile := filepath.Join(dir, "ext4.img") + f, err := os.Create(outfile) + if err != nil { + t.Fatalf("Error creating empty image file: %v", err) + } + if err := f.Truncate(size); err != nil { + t.Fatalf("Error truncating image file: %v", err) + } + return outfile, f +} + +func TestCopyFileSystemIntegration(t *testing.T) { + srcInfo, err := os.Stat(imgFile) + if err != nil { + t.Fatalf("Error stating test image: %v", err) + } + srcSize := srcInfo.Size() + dstSize := srcSize + srcSize/5 + + srcFile, err := os.Open(imgFile) + if err != nil { + t.Fatalf("Error opening test image: %v", err) + } + defer srcFile.Close() + + srcBackend := file.New(srcFile, true) + srcFS, err := ext4.Read(srcBackend, srcSize, 0, 512) + if err != nil { + t.Fatalf("Error reading filesystem: %v", err) + } + + dstPath, dstFile := testCreateEmptyFile(t, dstSize) + defer dstFile.Close() + + dstBackend := file.New(dstFile, false) + dstFS, err := ext4.Create(dstBackend, dstSize, 0, 512, &ext4.Params{}) + if err != nil { + t.Fatalf("Error creating destination filesystem: %v", err) + } + if dstFS == nil { + t.Fatalf("Expected non-nil filesystem after creation") + } + + if err := diskfsync.CopyFileSystem(srcFS, dstFS); err != nil { + t.Fatalf("Error copying filesystem: %v", err) + } + + if err := dstFile.Sync(); err != nil { + t.Fatalf("Error syncing destination file: %v", err) + } + + srcVerifyFile, srcVerifyFS, err := readExt4FS(imgFile, srcSize) + if err != nil { + t.Fatalf("Error reopening source filesystem: %v", err) + } + defer srcVerifyFile.Close() + dstVerifyFile, dstVerifyFS, err := readExt4FS(dstPath, dstSize) + if err != nil { + t.Fatalf("Error reopening destination filesystem: %v", err) + } + defer dstVerifyFile.Close() + + cmd := exec.Command("e2fsck", "-f", "-n", "-vv", dstPath) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + t.Fatalf("e2fsck failed: %v,\nstdout:\n%s,\n\nstderr:\n%s", err, stdout.String(), stderr.String()) + } + + if err := compareFileSystems(srcVerifyFS, dstVerifyFS); err != nil { + t.Fatalf("Filesystem copy mismatch: %v", err) + } +} + +func readExt4FS(p string, size int64) (*os.File, fs.FS, error) { + f, err := os.Open(p) + if err != nil { + return nil, nil, err + } + b := file.New(f, true) + ext4FS, err := ext4.Read(b, size, 0, 512) + if err != nil { + _ = f.Close() + return nil, nil, err + } + return f, ext4FS, nil +} + +func compareFileSystems(src, dst fs.FS) error { + return compareDir(src, dst, ".") +} + +func compareDir(src, dst fs.FS, dir string) error { + srcEntries, err := fs.ReadDir(src, dir) + if err != nil { + return fmt.Errorf("read dir %s: %w", dir, err) + } + dstEntries, err := fs.ReadDir(dst, dir) + if err != nil { + return fmt.Errorf("read dir %s (dst): %w", dir, err) + } + + srcMap := make(map[string]fs.DirEntry, len(srcEntries)) + for _, entry := range srcEntries { + if excludedPaths[entry.Name()] { + continue + } + srcMap[entry.Name()] = entry + } + dstMap := make(map[string]fs.DirEntry, len(dstEntries)) + for _, entry := range dstEntries { + if excludedPaths[entry.Name()] { + continue + } + dstMap[entry.Name()] = entry + } + + if len(srcMap) != len(dstMap) { + return fmt.Errorf("entry count mismatch in %s: src=%d dst=%d", dir, len(srcMap), len(dstMap)) + } + + names := make([]string, 0, len(srcMap)) + for name := range srcMap { + names = append(names, name) + } + sort.Strings(names) + + for _, name := range names { + srcEntry, ok := srcMap[name] + if !ok { + return fmt.Errorf("missing source entry %s", name) + } + dstEntry, ok := dstMap[name] + if !ok { + return fmt.Errorf("missing destination entry %s in %s", name, dir) + } + + srcInfo, err := srcEntry.Info() + if err != nil { + return fmt.Errorf("stat %s: %w", name, err) + } + dstInfo, err := dstEntry.Info() + if err != nil { + return fmt.Errorf("stat %s (dst): %w", name, err) + } + + srcIsDir := srcEntry.IsDir() + dstIsDir := dstEntry.IsDir() + if srcIsDir != dstIsDir { + return fmt.Errorf("type mismatch for %s: dir=%v dst=%v", name, srcIsDir, dstIsDir) + } + + srcIsSymlink := srcInfo.Mode()&fs.ModeSymlink != 0 + dstIsSymlink := dstInfo.Mode()&fs.ModeSymlink != 0 + if srcIsSymlink != dstIsSymlink { + return fmt.Errorf("symlink mismatch for %s: src=%v dst=%v", name, srcIsSymlink, dstIsSymlink) + } + + fullPath := name + if dir != "." { + fullPath = path.Join(dir, name) + } + + switch { + case srcIsSymlink: + srcTarget, err := readlink(src, fullPath) + if err != nil { + return fmt.Errorf("readlink %s: %w", fullPath, err) + } + dstTarget, err := readlink(dst, fullPath) + if err != nil { + return fmt.Errorf("readlink %s (dst): %w", fullPath, err) + } + if srcTarget != dstTarget { + return fmt.Errorf("symlink target mismatch for %s: %q vs %q", fullPath, srcTarget, dstTarget) + } + case srcIsDir: + if err := compareDir(src, dst, fullPath); err != nil { + return err + } + default: + if srcInfo.Size() != dstInfo.Size() { + return fmt.Errorf("size mismatch for %s: src=%d dst=%d", fullPath, srcInfo.Size(), dstInfo.Size()) + } + match, err := compareFileContents(src, dst, fullPath) + if err != nil { + return err + } + if !match { + return fmt.Errorf("content mismatch for %s", fullPath) + } + } + } + + return nil +} + +func compareFileContents(src, dst fs.FS, p string) (bool, error) { + srcFile, err := src.Open(p) + if err != nil { + return false, fmt.Errorf("open %s: %w", p, err) + } + defer srcFile.Close() + dstFile, err := dst.Open(p) + if err != nil { + return false, fmt.Errorf("open %s (dst): %w", p, err) + } + defer dstFile.Close() + + srcHash := sha256.New() + dstHash := sha256.New() + buf := make([]byte, 32*1024) + if _, err := io.CopyBuffer(srcHash, srcFile, buf); err != nil { + return false, fmt.Errorf("hash %s: %w", p, err) + } + if _, err := io.CopyBuffer(dstHash, dstFile, buf); err != nil { + return false, fmt.Errorf("hash %s (dst): %w", p, err) + } + + return bytes.Equal(srcHash.Sum(nil), dstHash.Sum(nil)), nil +} + +func readlink(fsys fs.FS, p string) (string, error) { + type readlinker interface { + ReadLink(string) (string, error) + } + if rl, ok := fsys.(readlinker); ok { + return rl.ReadLink(p) + } + return "", fmt.Errorf("filesystem does not support readlink: %T", fsys) +} diff --git a/filesystem/ext4/extent.go b/filesystem/ext4/extent.go index 06c3bd25..dd038ce6 100644 --- a/filesystem/ext4/extent.go +++ b/filesystem/ext4/extent.go @@ -108,7 +108,8 @@ type extentChildPtr struct { // By definition, this is a leaf node, so depth=0 type extentLeafNode struct { extentNodeHeader - extents extents // the actual extents + extents extents // the actual extents + diskBlock uint64 // block number where this node is stored on disk (0 if root/in inode) } // findBlocks find the actual blocks for a range in the file. leaf nodes already have all of the data inside, @@ -193,7 +194,8 @@ func (e *extentLeafNode) getCount() uint32 { // By definition, this is an internal node, so depth>0 type extentInternalNode struct { extentNodeHeader - children []*extentChildPtr // the children + children []*extentChildPtr // the children + diskBlock uint64 // block number where this node is stored on disk (0 if root/in inode) } // findBlocks find the actual blocks for a range in the file. internal nodes need to read the filesystem to @@ -372,8 +374,9 @@ func parseExtents(b []byte, blocksize, start, count uint32) (extentBlockFinder, // if the existing tree is nil, create a new one. // For example, if the input is an extent tree - like the kind found in an inode - and you want to add more extents to it, // you add the provided extents, and it expands the tree, including creating new internal nodes and writing them to disk, as needed. +// Returns the updated tree, the number of metadata blocks allocated for extent tree nodes, and any error. -func extendExtentTree(existing extentBlockFinder, added *extents, fs *FileSystem, parent *extentInternalNode) (extentBlockFinder, error) { +func extendExtentTree(existing extentBlockFinder, added *extents, fs *FileSystem, parent *extentInternalNode) (extentBlockFinder, uint64, error) { // Check if existing is a leaf or internal node switch node := existing.(type) { case *extentLeafNode: @@ -382,9 +385,10 @@ func extendExtentTree(existing extentBlockFinder, added *extents, fs *FileSystem return extendInternalNode(node, added, fs, parent) case nil: // brand new extent tree. The root is in the inode, which has a max of 4 extents. - return createRootExtentTree(added, fs) + result, err := createRootExtentTree(added, fs) + return result, 0, err default: - return nil, fmt.Errorf("unsupported extentBlockFinder type") + return nil, 0, fmt.Errorf("unsupported extentBlockFinder type") } } @@ -407,7 +411,7 @@ func createRootExtentTree(added *extents, fs *FileSystem) (extentBlockFinder, er return nil, fmt.Errorf("cannot create root internal node") } -func extendLeafNode(node *extentLeafNode, added *extents, fs *FileSystem, parent *extentInternalNode) (extentBlockFinder, error) { +func extendLeafNode(node *extentLeafNode, added *extents, fs *FileSystem, parent *extentInternalNode) (extentBlockFinder, uint64, error) { // Check if the leaf node has enough space for the added extents if len(node.extents)+len(*added) <= int(node.max) { // Simply append the extents if there's enough space @@ -417,39 +421,62 @@ func extendLeafNode(node *extentLeafNode, added *extents, fs *FileSystem, parent // Write the updated node back to the disk err := writeNodeToDisk(node, fs, parent) if err != nil { - return nil, err + return nil, 0, err } - return node, nil - } - - // If not enough space, split the node - newNodes, err := splitLeafNode(node, added, fs, parent) - if err != nil { - return nil, err + return node, 0, nil } - // Check if the original node was the root + // Check if the original node was the root (parent == nil) if parent == nil { + // Calculate max entries for a non-root leaf node + maxEntriesNonRoot := (node.blockSize - 12) / 12 + totalExtents := len(node.extents) + len(*added) + + // If all extents fit in a single non-root leaf node, create one leaf + internal root + // This avoids unnecessarily splitting into two nodes + if uint32(totalExtents) <= maxEntriesNonRoot { + newLeaf, metaBlocks, err := promoteLeafToChild(node, added, fs) + if err != nil { + return nil, 0, err + } + newRoot := createInternalNode([]extentBlockFinder{newLeaf}, nil, fs) + return newRoot, metaBlocks, nil + } + + // Otherwise split the node + newNodes, metaBlocks, err := splitLeafNode(node, added, fs, parent) + if err != nil { + return nil, 0, err + } + // Create a new internal node to reference the split leaf nodes var newNodesAsBlockFinder []extentBlockFinder for _, n := range newNodes { newNodesAsBlockFinder = append(newNodesAsBlockFinder, n) } newRoot := createInternalNode(newNodesAsBlockFinder, nil, fs) - return newRoot, nil + return newRoot, metaBlocks, nil + } + + // If not enough space in a non-root node, split it + newNodes, splitMetaBlocks, err := splitLeafNode(node, added, fs, parent) + if err != nil { + return nil, 0, err } // If the original node was not the root, handle the parent internal node parentNode, err := getParentNode(node, fs) if err != nil { - return nil, err + return nil, 0, err } - return extendInternalNode(parentNode, added, fs, parent) + _ = newNodes // nodes are already written to disk in splitLeafNode + result, parentMetaBlocks, err := extendInternalNode(parentNode, added, fs, parent) + return result, splitMetaBlocks + parentMetaBlocks, err } -func splitLeafNode(node *extentLeafNode, added *extents, fs *FileSystem, parent *extentInternalNode) ([]*extentLeafNode, error) { +func splitLeafNode(node *extentLeafNode, added *extents, fs *FileSystem, parent *extentInternalNode) ([]*extentLeafNode, uint64, error) { // Combine existing and new extents allExtents := node.extents allExtents = append(allExtents, *added...) @@ -461,12 +488,16 @@ func splitLeafNode(node *extentLeafNode, added *extents, fs *FileSystem, parent // Calculate the midpoint to split the extents mid := len(allExtents) / 2 + // Calculate max entries for non-root nodes (based on block size) + // Each entry is 12 bytes, header is 12 bytes + maxEntries := (node.blockSize - 12) / 12 + // Create the first new leaf node firstLeaf := &extentLeafNode{ extentNodeHeader: extentNodeHeader{ depth: 0, entries: uint16(mid), - max: node.max, + max: uint16(maxEntries), blockSize: node.blockSize, }, extents: allExtents[:mid], @@ -477,45 +508,132 @@ func splitLeafNode(node *extentLeafNode, added *extents, fs *FileSystem, parent extentNodeHeader: extentNodeHeader{ depth: 0, entries: uint16(len(allExtents) - mid), - max: node.max, + max: uint16(maxEntries), blockSize: node.blockSize, }, extents: allExtents[mid:], } - // Write new leaf nodes to the disk - err := writeNodeToDisk(firstLeaf, fs, parent) - if err != nil { - return nil, err + var metaBlocks uint64 + + // When splitting the root (parent == nil), we need to allocate new disk blocks + // for the child nodes since they will no longer live in the inode + if parent == nil { + // Allocate blocks for both new leaf nodes + blockAlloc, err := fs.allocateExtents(uint64(fs.superblock.blockSize)*2, nil) + if err != nil { + return nil, 0, fmt.Errorf("could not allocate blocks for split leaf nodes: %w", err) + } + // Get the starting block from the allocated extent + allocatedExtents := *blockAlloc + if len(allocatedExtents) == 0 || allocatedExtents[0].count < 2 { + return nil, 0, fmt.Errorf("could not allocate enough blocks for split leaf nodes") + } + firstLeaf.diskBlock = allocatedExtents[0].startingBlock + secondLeaf.diskBlock = allocatedExtents[0].startingBlock + 1 + metaBlocks = 2 + + // Write the leaf nodes to their allocated blocks + if err := writeNodeToBlock(firstLeaf, fs, firstLeaf.diskBlock); err != nil { + return nil, 0, err + } + if err := writeNodeToBlock(secondLeaf, fs, secondLeaf.diskBlock); err != nil { + return nil, 0, err + } + } else { + // Write new leaf nodes to the disk using parent reference + err := writeNodeToDisk(firstLeaf, fs, parent) + if err != nil { + return nil, 0, err + } + err = writeNodeToDisk(secondLeaf, fs, parent) + if err != nil { + return nil, 0, err + } + } + + return []*extentLeafNode{firstLeaf, secondLeaf}, metaBlocks, nil +} + +// promoteLeafToChild takes a root leaf node and its new extents, combines them into a single +// non-root leaf node that will live on disk. This is used when all extents fit in one non-root leaf. +func promoteLeafToChild(node *extentLeafNode, added *extents, fs *FileSystem) (*extentLeafNode, uint64, error) { + // Combine existing and new extents + allExtents := node.extents + allExtents = append(allExtents, *added...) + // Sort extents by fileBlock to maintain order + sort.Slice(allExtents, func(i, j int) bool { + return allExtents[i].fileBlock < allExtents[j].fileBlock + }) + + // Calculate max entries for non-root nodes (based on block size) + maxEntries := (node.blockSize - 12) / 12 + + // Create the new leaf node + newLeaf := &extentLeafNode{ + extentNodeHeader: extentNodeHeader{ + depth: 0, + entries: uint16(len(allExtents)), + max: uint16(maxEntries), + blockSize: node.blockSize, + }, + extents: allExtents, } - err = writeNodeToDisk(secondLeaf, fs, parent) + + // Allocate a block for the new leaf node + blockAlloc, err := fs.allocateExtents(uint64(fs.superblock.blockSize), nil) if err != nil { - return nil, err + return nil, 0, fmt.Errorf("could not allocate block for leaf node: %w", err) + } + allocatedExtents := *blockAlloc + if len(allocatedExtents) == 0 || allocatedExtents[0].count < 1 { + return nil, 0, fmt.Errorf("could not allocate block for leaf node") } + newLeaf.diskBlock = allocatedExtents[0].startingBlock - return []*extentLeafNode{firstLeaf, secondLeaf}, nil + // Write the leaf node to its allocated block + if err := writeNodeToBlock(newLeaf, fs, newLeaf.diskBlock); err != nil { + return nil, 0, err + } + + return newLeaf, 1, nil } func createInternalNode(nodes []extentBlockFinder, parent *extentInternalNode, fs *FileSystem) *extentInternalNode { + // Calculate max entries for internal nodes (based on block size) + // Each entry is 12 bytes, header is 12 bytes + // For root node in inode, max is 4 + maxEntries := uint16(4) + if parent != nil { + maxEntries = uint16((nodes[0].getBlockSize() - 12) / 12) + } + internalNode := &extentInternalNode{ extentNodeHeader: extentNodeHeader{ depth: nodes[0].getDepth() + 1, // Depth is 1 more than the children entries: uint16(len(nodes)), - max: nodes[0].getMax(), // Assuming uniform max for all nodes + max: maxEntries, blockSize: nodes[0].getBlockSize(), }, children: make([]*extentChildPtr, len(nodes)), } for i, node := range nodes { + var diskBlock uint64 + if parent == nil { + // When creating a new root, get disk block from the node itself + diskBlock = getDiskBlockFromNode(node) + } else { + diskBlock = getBlockNumberFromNode(node, parent) + } internalNode.children[i] = &extentChildPtr{ fileBlock: node.getFileBlock(), count: node.getCount(), - diskBlock: getBlockNumberFromNode(node, parent), + diskBlock: diskBlock, } } - // Write the new internal node to the disk + // Write the new internal node to the disk (root nodes live in inode, so parent==nil means no write) err := writeNodeToDisk(internalNode, fs, parent) if err != nil { return nil @@ -525,6 +643,9 @@ func createInternalNode(nodes []extentBlockFinder, parent *extentInternalNode, f } func getBlockNumberFromNode(node extentBlockFinder, parent *extentInternalNode) uint64 { + if parent == nil { + return 0 + } for _, childPtr := range parent.children { if childPtrMatchesNode(childPtr, node) { return childPtr.diskBlock @@ -533,6 +654,30 @@ func getBlockNumberFromNode(node extentBlockFinder, parent *extentInternalNode) return 0 // Return 0 or an appropriate error value if the block number is not found } +// getDiskBlockFromNode retrieves the disk block number stored in the node itself +func getDiskBlockFromNode(node extentBlockFinder) uint64 { + switch n := node.(type) { + case *extentLeafNode: + return n.diskBlock + case *extentInternalNode: + return n.diskBlock + default: + return 0 + } +} + +// writeNodeToBlock writes an extent node to a specific disk block +func writeNodeToBlock(node extentBlockFinder, fs *FileSystem, blockNumber uint64) error { + writableFile, err := fs.backend.Writable() + if err != nil { + return err + } + + data := node.toBytes() + _, err = writableFile.WriteAt(data, int64(blockNumber)*int64(fs.superblock.blockSize)) + return err +} + // Helper function to match a child pointer to a node func childPtrMatchesNode(childPtr *extentChildPtr, node extentBlockFinder) bool { switch n := node.(type) { @@ -547,7 +692,7 @@ func childPtrMatchesNode(childPtr *extentChildPtr, node extentBlockFinder) bool } } -func extendInternalNode(node *extentInternalNode, added *extents, fs *FileSystem, parent *extentInternalNode) (extentBlockFinder, error) { +func extendInternalNode(node *extentInternalNode, added *extents, fs *FileSystem, parent *extentInternalNode) (extentBlockFinder, uint64, error) { // Find the appropriate child node to extend childIndex := findChildNode(node, added) childPtr := node.children[childIndex] @@ -555,13 +700,13 @@ func extendInternalNode(node *extentInternalNode, added *extents, fs *FileSystem // Load the actual child node from the disk childNode, err := loadChildNode(childPtr, fs) if err != nil { - return nil, err + return nil, 0, err } // Recursively extend the child node - updatedChild, err := extendExtentTree(childNode, added, fs, node) + updatedChild, metaBlocks, err := extendExtentTree(childNode, added, fs, node) if err != nil { - return nil, err + return nil, 0, err } // Update the current internal node to reference the updated child @@ -579,7 +724,7 @@ func extendInternalNode(node *extentInternalNode, added *extents, fs *FileSystem diskBlock: getBlockNumberFromNode(updatedChild, node), } default: - return nil, fmt.Errorf("unsupported updatedChild type") + return nil, 0, fmt.Errorf("unsupported updatedChild type") } // Check if the internal node is at capacity @@ -587,7 +732,7 @@ func extendInternalNode(node *extentInternalNode, added *extents, fs *FileSystem // Split the internal node if it's at capacity newInternalNodes, err := splitInternalNode(node, node.children[childIndex], fs, parent) if err != nil { - return nil, err + return nil, 0, err } // Check if the original node was the root @@ -598,7 +743,7 @@ func extendInternalNode(node *extentInternalNode, added *extents, fs *FileSystem newNodesAsBlockFinder = append(newNodesAsBlockFinder, n) } newRoot := createInternalNode(newNodesAsBlockFinder, nil, fs) - return newRoot, nil + return newRoot, metaBlocks, nil } // If the original node was not the root, handle the parent internal node @@ -608,10 +753,10 @@ func extendInternalNode(node *extentInternalNode, added *extents, fs *FileSystem // Write the updated node back to the disk err = writeNodeToDisk(node, fs, parent) if err != nil { - return nil, err + return nil, 0, err } - return node, nil + return node, metaBlocks, nil } // Helper function to get the parent node of a given internal node diff --git a/filesystem/ext4/file.go b/filesystem/ext4/file.go index 85ec6f1f..ebec494b 100644 --- a/filesystem/ext4/file.go +++ b/filesystem/ext4/file.go @@ -97,7 +97,6 @@ func (fl *File) Write(b []byte) (int, error) { var ( fileSize = int64(fl.size) originalFileSize = int64(fl.size) - blockCount = fl.blocks originalBlockCount = fl.blocks blocksize = uint64(fl.filesystem.superblock.blockSize) ) @@ -125,14 +124,13 @@ func (fl *File) Write(b []byte) (int, error) { if fl.size%blocksize > 0 { newBlockCount++ } - if newBlockCount > blockCount { - blocksNeeded := newBlockCount - blockCount - bytesNeeded := blocksNeeded * blocksize - newExtents, err := fl.filesystem.allocateExtents(bytesNeeded, &fl.extents) + allocatedBlocks := fl.extents.blockCount() + if newBlockCount > allocatedBlocks { + newExtents, err := fl.filesystem.allocateExtents(fl.size, &fl.extents) if err != nil { return 0, fmt.Errorf("could not allocate disk space for file %w", err) } - extentTreeParsed, err := extendExtentTree(fl.inode.extents, newExtents, fl.filesystem, nil) + extentTreeParsed, metaBlocks, err := extendExtentTree(fl.inode.extents, newExtents, fl.filesystem, nil) if err != nil { return 0, fmt.Errorf("could not convert extents into tree: %w", err) } @@ -142,7 +140,11 @@ func (fl *File) Write(b []byte) (int, error) { return 0, fmt.Errorf("could not read updated extents: %w", err) } fl.extents = updatedExtents - fl.blocks = newBlockCount + if fl.filesystemBlocks { + fl.blocks = newBlockCount + metaBlocks + } else { + fl.blocks = (newBlockCount + metaBlocks) * blocksize / 512 + } } if originalFileSize != int64(fl.size) || originalBlockCount != fl.blocks { @@ -194,11 +196,7 @@ func (fl *File) Write(b []byte) (int, error) { } } - if fl.offset >= fileSize { - err = io.EOF - } - - return int(writtenBytes), err + return int(writtenBytes), nil } // Seek set the offset to a particular point in the file diff --git a/filesystem/ext4/inode.go b/filesystem/ext4/inode.go index 29b59a4c..f868178d 100644 --- a/filesystem/ext4/inode.go +++ b/filesystem/ext4/inode.go @@ -157,6 +157,8 @@ func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { if len(b) < int(minInodeSize) { return nil, fmt.Errorf("inode data too short: %d bytes, must be min %d bytes", len(b), minInodeSize) } + // only work with the amount of data that is the size of the inode, even if more was passed in + b = b[:sb.inodeSize] // checksum before using the data checksumBytes := make([]byte, 4) @@ -175,7 +177,6 @@ func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { fileSize := make([]byte, 8) group := make([]byte, 4) version := make([]byte, 8) - extendedAttributeBlock := make([]byte, 8) mode := binary.LittleEndian.Uint16(b[0x0:0x2]) @@ -187,8 +188,12 @@ func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { copy(fileSize[4:8], b[0x6c:0x70]) copy(version[0:4], b[0x24:0x28]) copy(version[4:8], b[0x98:0x9c]) - copy(extendedAttributeBlock[0:4], b[0x88:0x8c]) - copy(extendedAttributeBlock[4:6], b[0x76:0x78]) + + // i_generation (nfs file version) + iGeneration := binary.LittleEndian.Uint32(b[0x64:0x68]) + fileACLLo := binary.LittleEndian.Uint32(b[0x68:0x6c]) + fileACLHi := uint32(binary.LittleEndian.Uint16(b[0x76:0x78])) + extendedAttributeBlock := (uint64(fileACLHi) << 32) | uint64(fileACLLo) // get the times // the structure normally is 0:4 (32 bits) is seconds since the epoch @@ -295,7 +300,7 @@ func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { blocks: blocks, filesystemBlocks: filesystemBlocks, flags: &flags, - nfsFileVersion: binary.LittleEndian.Uint32(b[0x64:0x68]), + nfsFileVersion: iGeneration, version: binary.LittleEndian.Uint64(version), inodeSize: binary.LittleEndian.Uint16(b[0x80:0x82]) + minInodeSize, deletionTime: binary.LittleEndian.Uint32(b[0x14:0x18]), @@ -303,7 +308,7 @@ func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { changeTime: time.Unix(ctimeSec, ctimeNano), modifyTime: time.Unix(mtimeSec, mtimeNano), createTime: time.Unix(crtimeSec, crtimeNano), - extendedAttributeBlock: binary.LittleEndian.Uint64(extendedAttributeBlock), + extendedAttributeBlock: extendedAttributeBlock, project: binary.LittleEndian.Uint32(b[0x9c:0x100]), extents: allExtents, blockPointers: blockPointers, @@ -348,13 +353,21 @@ func (i *inode) toBytes(sb *superblock) []byte { // ext4 timestamps are 32 bits of seconds, plus an extra 32-bit field // containing 30 bits of nanoseconds and 2 bits of extended seconds. // See https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Inode_Timestamps + // + // The encoding formula from Linux kernel is: + // extra_epoch = ((sec - (int32)sec) >> 32) & 0x3 + // This correctly handles the signed 32-bit wraparound for dates outside 1970-2038. encodeAndWriteTimestamp := func(t time.Time, target []byte) { - seconds := t.Unix() - nanos := uint32(t.Nanosecond()) - high := uint32((seconds-int64(int32(seconds)))>>32) & 0x3 - extra := (nanos << 2) | high - - binary.LittleEndian.PutUint32(target[0:4], uint32(seconds)) + sec := t.Unix() + nsec := uint32(t.Nanosecond()) + // Calculate epoch bits using the kernel's formula: + // The difference between the full timestamp and its signed 32-bit truncation + // gives us the correct epoch value. + low32 := int32(sec) // signed truncation to 32 bits + epoch := uint32(((sec - int64(low32)) >> 32) & 0x3) // epoch bits + extra := (nsec << 2) | epoch + + binary.LittleEndian.PutUint32(target[0:4], uint32(low32)) binary.LittleEndian.PutUint32(target[4:8], extra) } @@ -379,19 +392,23 @@ func (i *inode) toBytes(sb *superblock) []byte { copy(b[0x1c:0x20], blocks[0:4]) binary.LittleEndian.PutUint32(b[0x20:0x24], i.flags.toInt()) copy(b[0x24:0x28], version[0:4]) - if i.flags != nil && i.flags.usesExtents { + switch { + case i.fileType == fileTypeSymbolicLink && i.size < 60: + copy(b[0x28:0x28+int(i.size)], i.linkTarget) + case i.flags != nil && i.flags.usesExtents: copy(b[0x28:0x64], i.extents.toBytes()) - } else { + default: for idx, ptr := range i.blockPointers { base := 0x28 + idx*4 binary.LittleEndian.PutUint32(b[base:base+4], ptr) } } + binary.LittleEndian.PutUint32(b[0x64:0x68], i.nfsFileVersion) copy(b[0x68:0x6c], extendedAttributeBlock[0:4]) copy(b[0x6c:0x70], fileSize[4:8]) // b[0x70:0x74] is obsolete - copy(b[0x74:0x76], blocks[4:8]) + copy(b[0x74:0x76], blocks[4:6]) copy(b[0x76:0x78], extendedAttributeBlock[4:6]) copy(b[0x78:0x7a], owner[2:4]) copy(b[0x7a:0x7c], group[2:4]) diff --git a/go.mod b/go.mod index 7a21f4af..92f8c764 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/diskfs/go-diskfs -go 1.24.0 +go 1.25.7 require ( github.com/anchore/go-lzo v0.1.0 diff --git a/go.sum b/go.sum index 1acf1182..089057e4 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,6 @@ github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc= github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= -github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= github.com/pkg/xattr v0.4.12 h1:rRTkSyFNTRElv6pkA3zpjHpQ90p/OdHQC1GmGh1aTjM= github.com/pkg/xattr v0.4.12/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -34,8 +32,6 @@ github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0o golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/sync/copy.go b/sync/copy.go index 5a04e085..e28886e4 100644 --- a/sync/copy.go +++ b/sync/copy.go @@ -7,18 +7,22 @@ import ( "io/fs" "log" "os" + "path" "github.com/diskfs/go-diskfs/disk" "github.com/diskfs/go-diskfs/filesystem" "github.com/diskfs/go-diskfs/partition/part" ) +// excludedPaths these are excluded from any copy var excludedPaths = map[string]bool{ "lost+found": true, ".DS_Store": true, "System Volume Information": true, } +const maxCopyAllSize = 64 * 1024 * 1024 + type copyData struct { count int64 err error @@ -26,64 +30,117 @@ type copyData struct { // CopyFileSystem copies files from a source fs.FS to a destination filesystem.FileSystem, preserving structure and contents. func CopyFileSystem(src fs.FS, dst filesystem.FileSystem) error { - return fs.WalkDir(src, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } + return copyDir(src, dst, ".") +} + +func copyDir(src fs.FS, dst filesystem.FileSystem, dir string) error { + entries, err := fs.ReadDir(src, dir) + if err != nil { + return fmt.Errorf("read dir %s: %w", dir, err) + } + + for _, entry := range entries { + name := entry.Name() // filter out special directories/files - if excludedPaths[d.Name()] { - if d.IsDir() { - return fs.SkipDir + if excludedPaths[name] { + if entry.IsDir() { + continue } - return nil + continue } - if path == "." || path == "/" || path == "\\" { - return nil + + p := name + if dir != "." { + p = path.Join(dir, name) } - info, err := d.Info() + info, err := entry.Info() if err != nil { - return err + return fmt.Errorf("stat %s: %w", p, err) } // symlinks, when they exist if info.Mode()&os.ModeSymlink != 0 { // Check if your destination interface supports symlinks // Most custom 'filesystem.FileSystem' interfaces might not. - return handleSymlink(src, dst, path) + if err := handleSymlink(src, dst, p); err != nil { + return fmt.Errorf("copy symlink %s: %w", p, err) + } + continue } - if d.IsDir() { - if path == "." { - return nil + if entry.IsDir() { + if err := dst.Mkdir(p); err != nil { + return fmt.Errorf("create dir %s: %w", p, err) + } + if err := copyDir(src, dst, p); err != nil { + return fmt.Errorf("copy dir %s: %w", p, err) } - return dst.Mkdir(path) + continue } if !info.Mode().IsRegular() { // FAT32 / ISO / SquashFS should not have others - return nil + continue } - return copyOneFile(src, dst, path, info) - }) + if err := copyOneFile(src, dst, p, info); err != nil { + return fmt.Errorf("copy file %s: %w", p, err) + } + } + + return nil } -func copyOneFile(src fs.FS, dst filesystem.FileSystem, path string, info fs.FileInfo) error { - in, err := src.Open(path) +func copyOneFile(src fs.FS, dst filesystem.FileSystem, p string, info fs.FileInfo) error { + in, err := src.Open(p) if err != nil { return err } defer func() { _ = in.Close() }() - out, err := dst.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_RDWR) + out, err := dst.OpenFile(p, os.O_CREATE|os.O_TRUNC|os.O_RDWR) if err != nil { return err } defer func() { _ = out.Close() }() - if _, err := io.Copy(out, in); err != nil { - return err + if info.Size() <= maxCopyAllSize { + data, err := io.ReadAll(in) + if err != nil { + return err + } + n, err := out.Write(data) + if err != nil { + return err + } + if n != len(data) { + return io.ErrShortWrite + } + } else { + buf := make([]byte, 32*1024) + for { + n, rerr := in.Read(buf) + if n > 0 { + written := 0 + for written < n { + w, werr := out.Write(buf[written:n]) + if werr != nil { + return werr + } + if w == 0 { + return io.ErrShortWrite + } + written += w + } + } + if rerr == io.EOF { + break + } + if rerr != nil { + return rerr + } + } } // Restore timestamps *after* data is written (tar semantics) @@ -91,27 +148,31 @@ func copyOneFile(src fs.FS, dst filesystem.FileSystem, path string, info fs.File if atime.IsZero() { atime = info.ModTime() // fallback } - return dst.Chtimes( - path, + if err := dst.Chtimes( + p, info.ModTime(), // creation time fallback if not available atime, // access time: optional / policy choice info.ModTime(), - ) + ); err != nil { + // Best-effort: copying content should still succeed even if timestamps cannot be set. + return nil + } + return nil } // handleSymlink handles copying a symlink from src to dst. It reads the link target -// -//nolint:revive,unparam // keeping args for clarity of intent. -func handleSymlink(src fs.FS, dst filesystem.FileSystem, path string) error { - // Note: src must support ReadLink. If src is an os.DirFS, - // you might need a type assertion or use os.Readlink directly. - linkTarget, err := os.Readlink(path) - if err != nil { - return nil // Or handle error +func handleSymlink(src fs.FS, dst filesystem.FileSystem, p string) error { + type readlinker interface { + ReadLink(string) (string, error) } - - // This assumes your 'dst' interface has a Symlink method - return dst.Symlink(linkTarget, path) + if rl, ok := src.(readlinker); ok { + linkTarget, err := rl.ReadLink(p) + if err != nil { + return err + } + return dst.Symlink(linkTarget, p) + } + return fmt.Errorf("source filesystem does not support reading symlinks for %s", p) } // CopyPartitionRaw copies raw data from one partition to another and verifies the copy. diff --git a/sync/copy_test.go b/sync/copy_test.go index 4421f6c7..6e93cd86 100644 --- a/sync/copy_test.go +++ b/sync/copy_test.go @@ -197,8 +197,9 @@ func TestCopyFileSystem_SkipNonRegular(t *testing.T) { "sl": {Data: []byte(""), Mode: fs.ModeSymlink}, } dst := &fakeFS{} - if err := CopyFileSystem(src, dst); err != nil { - t.Fatalf("CopyFileSystem failed: %v", err) + err := CopyFileSystem(src, dst) + if err != nil { + t.Fatalf("CopyFileSystem have unexpected error for a source filesystem without links: %v", err) } if _, ok := dst.files["sl"]; ok { t.Errorf("expected non-regular file to be skipped, but copied")