diff --git a/filesystem/ext4/ext4.go b/filesystem/ext4/ext4.go index 3f3eef8..c34d332 100644 --- a/filesystem/ext4/ext4.go +++ b/filesystem/ext4/ext4.go @@ -65,11 +65,24 @@ const ( defaultLogGroupsPerFlex int = 3 // fixed inodes - rootInode uint32 = 2 - userQuotaInode uint32 = 3 - groupQuotaInode uint32 = 4 - journalInode uint32 = 8 - lostFoundInode = 11 // traditional + rootInode uint32 = 2 + userQuotaInode uint32 = 3 + groupQuotaInode uint32 = 4 + bootLoaderIndoe uint32 = 5 + undeleteDirectoryInode uint32 = 6 + groupDescriptorsInode uint32 = 7 + journalInode uint32 = 8 + excludeInode uint32 = 9 + replicaInode uint32 = 10 + lostFoundInode = 11 // traditional + + // journal info + journalMaxSize int64 = 128 * MB + journalMinSize int64 = 4 * MB + + // reserved GDT info + gdtMaxReservedBlocks uint64 = 256 + gdtDefaultMaxGrowthFactor uint64 = 1024 ) type Params struct { @@ -92,13 +105,14 @@ type Params struct { // FileSystem implememnts the FileSystem interface type FileSystem struct { - bootSector []byte - superblock *superblock - groupDescriptors *groupDescriptors - blockGroups int64 - size int64 - start int64 - backend backend.Storage + bootSector []byte + superblock *superblock + groupDescriptors *groupDescriptors + blockGroups int64 + size int64 + start int64 + backend backend.Storage + backupSuperblocks []int64 } // Equal compare if two filesystems are equal @@ -137,6 +151,12 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS if p == nil { p = &Params{} } + fflags := defaultFeatureFlags + for _, flagopt := range p.Features { + flagopt(&fflags) + } + + mflags := defaultMiscFlags // sectorsize must be <=0 or exactly SectorSize512 or error // because of this, we know we can scale it down to a uint32, since it only can be 512 bytes @@ -204,12 +224,20 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS } // how many block groups do we have? - blockGroups := numblocks / int64(blocksPerGroup) + blockGroups := (numblocks + int64(blocksPerGroup) - 1) / int64(blocksPerGroup) // track how many free blocks we have freeBlocks := numblocks - clusterSize := p.ClusterSize + // cluster semantics + var clusterSize int64 + var clustersPerGroup uint32 + if fflags.bigalloc { + return nil, fmt.Errorf("bigalloc not yet supported") + } + // non-bigalloc: cluster == block + clusterSize = int64(blocksize) + clustersPerGroup = blocksPerGroup // use our inode ratio to determine how many inodes we should have inodeRatio := p.InodeRatio @@ -236,32 +264,15 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS return nil, fmt.Errorf("requested %d inodes, greater than max %d", inodeCount, max32Num) } - inodesPerGroup := int64(inodeCount) / blockGroups + raw := (int64(inodeCount) + blockGroups - 1) / blockGroups // round UP - // track how many free inodes we have - freeInodes := inodeCount + // ext requires multiple of 8 + inodesPerGroup := (raw + 7) &^ 7 - // which blocks have superblock and GDT? - var ( - backupSuperblocks []int64 - backupSuperblockGroupsSparse [2]uint32 - ) - // 0 - primary - // ?? - backups - switch p.SparseSuperVersion { - case 2: - // backups in first and last block group - backupSuperblockGroupsSparse = [2]uint32{0, uint32(blockGroups) - 1} - backupSuperblocks = []int64{0, 1, blockGroups - 1} - default: - backupSuperblockGroups := calculateBackupSuperblockGroups(blockGroups) - backupSuperblocks = []int64{0} - for _, bg := range backupSuperblockGroups { - backupSuperblocks = append(backupSuperblocks, bg*int64(blocksPerGroup)) - } - } + inodeCount = uint32(inodesPerGroup * blockGroups) - freeBlocks -= int64(len(backupSuperblocks)) + // track how many free inodes we have + freeInodes := inodeCount var firstDataBlock uint32 if blocksize == 1024 { @@ -340,8 +351,8 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS } */ - // allocate root directory, single inode - freeInodes-- + // allocate reserved inodes, including root (inodes 1-10) + freeInodes -= firstNonReservedInode - 1 // how many reserved blocks? reservedBlocksPercent := p.ReservedBlocksPercent @@ -349,15 +360,6 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS reservedBlocksPercent = DefaultReservedBlocksPercent } - // are checksums enabled? - gdtChecksumType := gdtChecksumNone - if p.Checksum { - gdtChecksumType = gdtChecksumMetadata - } - - // we do not yet support bigalloc - var clustersPerGroup = blocksPerGroup - // inodesPerGroup: once we know how many inodes per group, and how many groups // we will have the total inode count @@ -366,13 +368,6 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS volumeName = DefaultVolumeName } - fflags := defaultFeatureFlags - for _, flagopt := range p.Features { - flagopt(&fflags) - } - - mflags := defaultMiscFlags - // generate hash seed hashSeed, _ := uuid.NewRandom() hashSeedBytes := hashSeed[:] @@ -384,11 +379,17 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS binary.LittleEndian.Uint32(hashSeedBytes[12:16]), ) - // create a UUID for the journal - journalSuperblockUUID, _ := uuid.NewRandom() + // create a UUID for the journal - only for external journals + // For internal journals, this should be nil/zero + var journalSuperblockUUID uuid.UUID + var journalSuperblockUUIDPtr *uuid.UUID + if fflags.separateJournalDevice { + journalSuperblockUUID, _ = uuid.NewRandom() + journalSuperblockUUIDPtr = &journalSuperblockUUID + } // group descriptor size could be 32 or 64, depending on option - var gdSize uint16 + gdSize := groupDescriptorSize if fflags.fs64Bit { gdSize = groupDescriptorSize64Bit } @@ -400,17 +401,11 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS // calculate the maximum number of block groups // maxBlockGroups = (maxFSSize) / (blocksPerGroup * blocksize) - var ( - maxBlockGroups uint64 - ) - if fflags.fs64Bit { - maxBlockGroups = maxFilesystemSize64Bit / (uint64(blocksPerGroup) * uint64(blocksize)) - } else { - maxBlockGroups = maxFilesystemSize32Bit / (uint64(blocksPerGroup) * uint64(blocksize)) - } - reservedGDTBlocks := maxBlockGroups * 32 / maxBlockGroups - if reservedGDTBlocks > math.MaxUint16 { - return nil, fmt.Errorf("too many reserved blocks calculated for group descriptor table") + // TODO: Properly support resize_inode; for now avoid reserved GDT blocks unless explicitly enabled. + var reservedGDTBlocks uint64 + if fflags.reservedGDTBlocksForExpansion { + maxGrowthFilesystemSizeBytes := uint64(size) * gdtDefaultMaxGrowthFactor + reservedGDTBlocks = min(maxGrowthFilesystemSizeBytes/uint64(blocksize), gdtMaxReservedBlocks) } var ( @@ -454,6 +449,28 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS } } + // which blocks have superblock and GDT? + var ( + backupSuperblocks []int64 + backupSuperblockGroupsSparse [2]uint32 + ) + // 0 - primary + // ?? - backups + switch p.SparseSuperVersion { + case 2: + // backups in first and last block group + backupSuperblockGroupsSparse = [2]uint32{0, uint32(blockGroups) - 1} + backupSuperblocks = []int64{0, 1, blockGroups - 1} + default: + backupSuperblockGroups := calculateBackupSuperblockGroups(blockGroups) + backupSuperblocks = []int64{0} + for _, bg := range backupSuperblockGroups { + backupSuperblocks = append(backupSuperblocks, bg*int64(blocksPerGroup)) + } + } + + freeBlocks -= int64(len(backupSuperblocks)) + // create the superblock - MUST ADD IN OPTIONS now, epoch := time.Now(), time.Unix(0, 0) sb := superblock{ @@ -471,7 +488,7 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS mountTime: now, writeTime: now, mountCount: 0, - mountsToFsck: 0, + mountsToFsck: 100, // seems like a reasonable starting point filesystemState: fsStateCleanlyUnmounted, errorBehaviour: errorsContinue, minorRevision: 0, @@ -492,7 +509,7 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS preallocationBlocks: 0, // not used in Linux e2fsprogs preallocationDirectoryBlocks: 0, // not used in Linux e2fsprogs reservedGDTBlocks: uint16(reservedGDTBlocks), - journalSuperblockUUID: &journalSuperblockUUID, + journalSuperblockUUID: journalSuperblockUUIDPtr, journalInode: journalInode, journalDeviceNumber: journalDeviceNumber, orphanedInodesStart: 0, @@ -536,77 +553,96 @@ func Create(b backend.Storage, size, start, sectorsize int64, p *Params) (*FileS userQuotaInode: userQuotaInode, groupQuotaInode: groupQuotaInode, projectQuotaInode: projectQuotaInode, - logGroupsPerFlex: uint64(logGroupsPerFlex), + logGroupsPerFlex: uint64(1 << logGroupsPerFlex), } + gdt := buildGroupDescriptorsFromSuperblock(&sb) + // Make SubStorage Backend + fsBackend := backend.Sub(b, start, size) + fs := &FileSystem{ + bootSector: []byte{}, + superblock: &sb, + groupDescriptors: &gdt, + blockGroups: blockGroups, + size: size, + start: start, + backend: fsBackend, + backupSuperblocks: backupSuperblocks, + } + // allocate root in the first group descriptor bg0 := &gdt.descriptors[0] bg0.usedDirectories++ - bg0.freeInodes-- - g := gdt.toBytes(gdtChecksumType, sb.checksumSeed) + // Note: Root inode (2) is already part of reserved inodes (1-10), so no extra decrement - superblockBytes, err := sb.toBytes() - if err != nil { - return nil, fmt.Errorf("error converting Superblock to bytes: %v", err) - } + // reserved inodes need to be marked (inodes 1-10 are truly reserved, including root at 2) + reservedInodes := firstNonReservedInode - 1 // inodes 1-10 + bg0.freeInodes -= reservedInodes - // how big should the GDT be? - gdSize = groupDescriptorSize // size of a single group descriptor - if sb.features.fs64Bit { - gdSize = groupDescriptorSize64Bit + gdtByteCount := calculateGDTBytes(gdt, len(backupSuperblocks), sb.gdtChecksumType(), sb.checksumSeed) + // gdtByteCount is in bytes; convert to blocks for freeBlocks accounting + gdtBlocks := (gdtByteCount + uint64(sb.blockSize) - 1) / uint64(sb.blockSize) + if sb.freeBlocks >= gdtBlocks { + sb.freeBlocks -= gdtBlocks + } else { + sb.freeBlocks = 0 } - // now calculate how many there should be in total - groupCount := sb.blockGroupCount() - gdtSize := uint64(gdSize) * groupCount - // write the superblock and GDT to the various locations on disk - // Make SubStorage Backend - fsBackend := backend.Sub(b, start, size) + if err := fs.initGroupDescriptorTables(); err != nil { + return nil, fmt.Errorf("unable to initialize group descriptor tables: %w", err) + } - for _, bg := range backupSuperblocks { - block := bg * int64(blocksPerGroup) - blockStart := block * int64(blocksize) - // allow that the first one requires an offset - incr := int64(0) - if block == 0 { - incr = int64(SectorSize512) * 2 + // Sync the underlying file to ensure all writes are persisted + if osFile, err := fsBackend.Sys(); err == nil && osFile != nil { + if err := osFile.Sync(); err != nil { + return nil, fmt.Errorf("error syncing file: %v", err) } + } - writable, err := fsBackend.Writable() - if err != nil { - return nil, err - } + // write the superblock and GDT to the various locations on disk + if err := fs.writeSuperblock(); err != nil { + return nil, fmt.Errorf("error writing Superblock: %v", err) + } + if err := fs.writeGDT(); err != nil { + return nil, fmt.Errorf("error writing GDT: %v", err) + } - // write the superblock - count, err := writable.WriteAt(superblockBytes, incr+blockStart) - if err != nil { - return nil, fmt.Errorf("error writing Superblock for block %d to disk: %v", block, err) - } - if count != int(SuperblockSize) { - return nil, fmt.Errorf("wrote %d bytes of Superblock for block %d to disk instead of expected %d", count, block, SuperblockSize) + // create the journal inode if the has_journal feature is enabled + if sb.features.hasJournal && !sb.features.separateJournalDevice { + if err := fs.initJournal(); err != nil { + return nil, fmt.Errorf("could not initialize journal: %w", err) } + } - // write the GDT - count, err = writable.WriteAt(g, incr+blockStart+int64(SuperblockSize)) - if err != nil { - return nil, fmt.Errorf("error writing GDT for block %d to disk: %v", block, err) - } - if count != int(gdtSize) { - return nil, fmt.Errorf("wrote %d bytes of GDT for block %d to disk instead of expected %d", count, block, gdtSize) + // create resize inode only if the feature is enabled + if fs.superblock.features.reservedGDTBlocksForExpansion && fs.superblock.reservedGDTBlocks > 0 { + if err := fs.initResizeInode(); err != nil { + return nil, fmt.Errorf("could not initialize resize inode: %w", err) } } // create root directory + if err := fs.initFile( + rootInode, rootInode, fileTypeDirectory, + filePermissions{read: true, execute: true, write: true}, + filePermissions{read: true, execute: true}, + filePermissions{read: true, execute: true}, + 0, 0, + ); err != nil { + return nil, fmt.Errorf("could not initialize root directory: %w", err) + } + + // Recompute free blocks from group descriptors to keep superblock consistent. + var totalFreeBlocks uint64 + for _, gd := range fs.groupDescriptors.descriptors { + totalFreeBlocks += uint64(gd.freeBlocks) + } + fs.superblock.freeBlocks = totalFreeBlocks + if err := fs.writeSuperblock(); err != nil { + return nil, fmt.Errorf("error writing Superblock: %v", err) + } // there is nothing in there - return &FileSystem{ - bootSector: []byte{}, - superblock: &sb, - groupDescriptors: &gdt, - blockGroups: blockGroups, - size: size, - start: start, - backend: fsBackend, - }, nil + return fs, nil } // Read reads a filesystem from a given disk. @@ -696,14 +732,23 @@ func Read(b backend.Storage, size, start, sectorsize int64) (*FileSystem, error) return nil, fmt.Errorf("could not interpret Group Descriptor Table data: %v", err) } + // which blocks have superblock and GDT? + // 0 - primary + // ?? - backups + backupSuperblocks := []int64{0} + for _, bg := range sb.backupSuperblockBlockGroups { + backupSuperblocks = append(backupSuperblocks, int64(bg*sb.blocksPerGroup)) + } + return &FileSystem{ - bootSector: bs, - superblock: sb, - groupDescriptors: gdt, - blockGroups: int64(sb.blockGroupCount()), - size: size, - start: start, - backend: fsBackend, + bootSector: bs, + superblock: sb, + groupDescriptors: gdt, + blockGroups: int64(sb.blockGroupCount()), + size: size, + start: start, + backend: fsBackend, + backupSuperblocks: backupSuperblocks, }, nil } @@ -982,13 +1027,47 @@ func (fs *FileSystem) OpenFile(p string, flag int) (filesystem.File, error) { return nil, fmt.Errorf("could not read extent tree for inode %d: %v", inodeNumber, err) } return &File{ - directoryEntry: entry, - inode: inode, - isReadWrite: flag&os.O_RDWR != 0, - isAppend: flag&os.O_APPEND != 0, - offset: offset, - filesystem: fs, - extents: extents, + inode: inode, + isReadWrite: flag&os.O_RDWR != 0, + isAppend: flag&os.O_APPEND != 0, + offset: offset, + filesystem: fs, + extents: extents, + filename: filename, + fileType: entry.fileType, + }, nil +} + +// openFileViaInode opens a file given its path and flags, using the inode directly. +// Will not create the file if it does not exist. +// Does not follow symlinks. +func (fs *FileSystem) openFileViaInode(inodeNumber uint32, flag int) (filesystem.File, error) { + inode, err := fs.readInode(inodeNumber) + if err != nil { + return nil, fmt.Errorf("could not read inode number %d: %v", inodeNumber, err) + } + + // if a symlink, read the target, rather than the inode itself, which does not point to anything + if inode.fileType == fileTypeSymbolicLink { + return nil, fmt.Errorf("cannot open file via inode: inode %d is a symbolic link", inodeNumber) + } + offset := int64(0) + if flag&os.O_APPEND == os.O_APPEND { + offset = int64(inode.size) + } + // when we open a file, we load the inode but also all of the extents + extents, err := inode.extents.blocks(fs) + if err != nil { + return nil, fmt.Errorf("could not read extent tree for inode %d: %v", inodeNumber, err) + } + return &File{ + inode: inode, + isReadWrite: flag&os.O_RDWR != 0, + isAppend: flag&os.O_APPEND != 0, + offset: offset, + filesystem: fs, + extents: extents, + fileType: directoryFileType(inode.fileType), }, nil } @@ -1588,31 +1667,17 @@ func (fs *FileSystem) mkDirEntry(parent *Directory, name string, isDir bool) (*d // - write inode to disk // create an inode - inodeNumber, err := fs.allocateInode(parent.inode) + inodeNumber, err := fs.allocateInode(parent.inode, 0) if err != nil { return nil, fmt.Errorf("could not allocate inode for file %s: %w", name, err) } - // get extents for the file - prefer in the same block group as the inode, if possible - newExtents, err := fs.allocateExtents(1, nil) - if err != nil { - return nil, fmt.Errorf("could not allocate disk space for file %s: %w", name, err) - } - extentTreeParsed, err := extendExtentTree(nil, newExtents, fs, nil) - if err != nil { - return nil, fmt.Errorf("could not convert extents into tree: %w", err) - } - // normally, after getting a tree from extents, you would need to then allocate all of the blocks - // in the extent tree - leafs and intermediate. However, because we are allocating a new directory - // with a single extent, we *know* it can fit in the inode itself (which has a max of 4), so no need // create a directory entry for the file deFileType := dirFileTypeRegular fileType := fileTypeRegularFile - var contentSize uint64 if isDir { deFileType = dirFileTypeDirectory fileType = fileTypeDirectory - contentSize = uint64(fs.superblock.blockSize) } de := directoryEntry{ inode: inodeNumber, @@ -1636,12 +1701,9 @@ func (fs *FileSystem) mkDirEntry(parent *Directory, name string, isDir bool) (*d return nil, fmt.Errorf("could not read parent extents for directory: %w", err) } dirFile := &File{ - inode: parentInode, - directoryEntry: &directoryEntry{ - inode: parent.inode, - filename: name, - fileType: dirFileTypeDirectory, - }, + inode: parentInode, + filename: name, + fileType: dirFileTypeDirectory, filesystem: fs, isReadWrite: true, isAppend: true, @@ -1656,23 +1718,72 @@ func (fs *FileSystem) mkDirEntry(parent *Directory, name string, isDir bool) (*d return nil, fmt.Errorf("wrote only %d bytes instead of expected %d for new directory", wrote, len(parentDirBytes)) } + // normally, after getting a tree from extents, you would need to then allocate all of the blocks + // in the extent tree - leafs and intermediate. However, because we are allocating a new directory + // with a single extent, we *know* it can fit in the inode itself (which has a max of 4), so no need + if err := fs.initFile( + inodeNumber, parentInode.number, + fileType, + parentInode.permissionsOwner, parentInode.permissionsGroup, parentInode.permissionsOther, + parentInode.owner, parentInode.group, + ); err != nil { + return nil, fmt.Errorf("could not initialize file %s: %w", name, err) + } + + // return + return &de, nil +} + +func (fs *FileSystem) initFile(inodeNumber, parentInodeNumber uint32, ft fileType, permissionsOwner, permissionsGroup, permissionsOther filePermissions, owner, group uint32) error { // write the inode for the new entry out + // get extents for the file - prefer in the same block group as the inode, if possible + var ( + extentTreeParsed extentBlockFinder + extentsInodeBlockCount uint64 + contentSize uint64 + newExtents *extents + err error + hardLinks uint16 = 1 + ) + if ft == fileTypeDirectory { + newExtents, err = fs.allocateExtents(1, nil) + if err != nil { + return fmt.Errorf("could not allocate disk space: %w", err) + } + extentTreeParsed, err = extendExtentTree(nil, newExtents, fs, nil) + if err != nil { + return fmt.Errorf("could not convert extents into tree: %w", err) + } + contentSize = uint64(fs.superblock.blockSize) + extentsFSBlockCount := newExtents.blockCount() + extentsInodeBlockCount = extentsFSBlockCount * uint64(fs.superblock.blockSize) / 512 + hardLinks = 2 + } else { + // zero-length regular files still need an extent header + extentTreeParsed = extentsBlockFinderFromExtents(nil, fs.superblock.blockSize) + } + // normally, after getting a tree from extents, you would need to then allocate all of the blocks + // in the extent tree - leafs and intermediate. However, because we are allocating a new directory + // with a single extent, we *know* it can fit in the inode itself (which has a max of 4), so no need + now := time.Now() in := inode{ - number: inodeNumber, - permissionsGroup: parentInode.permissionsGroup, - permissionsOwner: parentInode.permissionsOwner, - permissionsOther: parentInode.permissionsOther, - fileType: fileType, - owner: parentInode.owner, - group: parentInode.group, - size: contentSize, - hardLinks: 2, - blocks: newExtents.blockCount(), - flags: &inodeFlags{}, + number: inodeNumber, + permissionsGroup: permissionsGroup, + permissionsOwner: permissionsOwner, + permissionsOther: permissionsOther, + fileType: ft, + owner: owner, + group: group, + size: contentSize, + hardLinks: hardLinks, + blocks: extentsInodeBlockCount, + flags: &inodeFlags{ + usesExtents: true, + }, nfsFileVersion: 0, version: 0, - inodeSize: parentInode.inodeSize, + inodeSize: fs.superblock.inodeSize, deletionTime: 0, accessTime: now, changeTime: now, @@ -1684,10 +1795,10 @@ func (fs *FileSystem) mkDirEntry(parent *Directory, name string, isDir bool) (*d } // write the inode to disk if err := fs.writeInode(&in); err != nil { - return nil, fmt.Errorf("could not write inode for new directory: %w", err) + return fmt.Errorf("could not write inode for new file: %w", err) } // if a directory, put entries for . and .. in the first block for the new directory - if isDir { + if ft == fileTypeDirectory { initialEntries := []*directoryEntry{ { inode: inodeNumber, @@ -1695,25 +1806,24 @@ func (fs *FileSystem) mkDirEntry(parent *Directory, name string, isDir bool) (*d fileType: dirFileTypeDirectory, }, { - inode: parent.inode, + inode: parentInodeNumber, filename: "..", fileType: dirFileTypeDirectory, }, } newDir := Directory{ - directoryEntry: de, - root: false, - entries: initialEntries, - } - dirBytes := newDir.toBytes(fs.superblock.blockSize, directoryChecksumAppender(fs.superblock.checksumSeed, inodeNumber, 0)) - // write the bytes out to disk - dirFile = &File{ - inode: &in, - directoryEntry: &directoryEntry{ + directoryEntry: directoryEntry{ inode: inodeNumber, - filename: name, fileType: dirFileTypeDirectory, }, + root: false, + entries: initialEntries, + } + dirBytes := newDir.toBytes(fs.superblock.blockSize, directoryChecksumAppender(fs.superblock.checksumSeed, inodeNumber, 0)) + // write the bytes out to disk + dirFile := &File{ + inode: &in, + fileType: dirFileTypeDirectory, filesystem: fs, isReadWrite: true, isAppend: true, @@ -1722,69 +1832,94 @@ func (fs *FileSystem) mkDirEntry(parent *Directory, name string, isDir bool) (*d } wrote, err := dirFile.Write(dirBytes) if err != nil && err != io.EOF { - return nil, fmt.Errorf("unable to write new directory: %w", err) + return fmt.Errorf("unable to write new directory: %w", err) } if wrote != len(dirBytes) { - return nil, fmt.Errorf("wrote only %d bytes instead of expected %d for new entry", wrote, len(dirBytes)) + return fmt.Errorf("wrote only %d bytes instead of expected %d for new entry", wrote, len(dirBytes)) } } // return - return &de, nil + return nil } // allocateInode allocate a single inode // passed the parent, so it can know where to allocate it // logic: +// - requested is non-zero : try to allocate that inode number +// - requested is zero : // - parent is 0 : root inode, will allocate at 2 // - parent is 2 : child of root, will try to spread out // - else : try to collocate with parent, if possible -func (fs *FileSystem) allocateInode(parent uint32) (uint32, error) { +func (fs *FileSystem) allocateInode(parent uint32, requested int) (uint32, error) { var ( inodeNumber = -1 + bg int + gd groupDescriptor + bm *bitmap.Bitmap ) - if parent == 0 { + switch { + case requested != 0: + inodeNumber = requested + case parent == 0: inodeNumber = 2 } - // load the inode bitmap - var ( - bg int - gd groupDescriptor - ) writableFile, err := fs.backend.Writable() if err != nil { return 0, err } - for _, gd = range fs.groupDescriptors.descriptors { - if inodeNumber != -1 { - break - } - bg := int(gd.number) - bm, err := fs.readInodeBitmap(bg) + // if a specific inode was requested, then try to get that one + if inodeNumber != -1 { + // try to allocate the requested inode + bg = blockGroupForInode(requested, fs.superblock.inodesPerGroup) + gd = fs.groupDescriptors.descriptors[bg] + bm, err = fs.readInodeBitmap(bg) if err != nil { return 0, fmt.Errorf("could not read inode bitmap: %w", err) } - // get first free inode - inodeNumber = bm.FirstFree(0) - // if we found a - if inodeNumber == -1 { - continue - } - // set it as marked - if err := bm.Set(inodeNumber); err != nil { - return 0, fmt.Errorf("could not set inode bitmap: %w", err) - } - // write the inode bitmap bytes - if err := fs.writeInodeBitmap(bm, bg); err != nil { - return 0, fmt.Errorf("could not write inode bitmap: %w", err) + } else { + for _, gd = range fs.groupDescriptors.descriptors { + if inodeNumber != -1 { + break + } + bg = int(gd.number) + bm, err = fs.readInodeBitmap(bg) + if err != nil { + return 0, fmt.Errorf("could not read inode bitmap: %w", err) + } + // get first free inode, will return -1 if none free + inodeInBG := bm.FirstFree(0) + if inodeInBG != -1 { + inodeNumber = inodeInBG + int(fs.superblock.inodesPerGroup)*bg + break + } } } + + // if we could not find any free inode, return an error if inodeNumber == -1 { return 0, errors.New("no free inodes available") } + inodeInBG := inodeNumber - int(fs.superblock.inodesPerGroup)*bg + isSet, err := bm.IsSet(inodeInBG) + if err != nil { + return 0, fmt.Errorf("could not check inode bitmap for requested inode %d: %w", requested, err) + } + if isSet { + return 0, fmt.Errorf("requested inode %d is already in use", inodeNumber) + } + // set it as marked + if err := bm.Set(inodeInBG); err != nil { + return 0, fmt.Errorf("could not set inode bitmap for requested inode %d: %w", inodeNumber, err) + } + // write the inode bitmap bytes + if err := fs.writeInodeBitmap(bm, bg); err != nil { + return 0, fmt.Errorf("could not write inode bitmap for requested inode %d: %w", inodeNumber, err) + } + // reduce number of free inodes in that descriptor in the group descriptor table gd.freeInodes-- @@ -1804,6 +1939,12 @@ func (fs *FileSystem) allocateInode(parent uint32) (uint32, error) { return 0, fmt.Errorf("wrote only %d bytes instead of expected %d for group descriptor of block group %d", wrote, len(gdBytes), bg) } + // update inode count in superblock + fs.superblock.freeInodes-- + if err := fs.writeSuperblock(); err != nil { + return 0, err + } + return uint32(inodeNumber), nil } @@ -1826,6 +1967,7 @@ func (fs *FileSystem) allocateExtents(size uint64, previous *extents) (*extents, } // 3- if needed, allocate new blocks in extents extraBlockCount := required - allocated + newBlocks := extraBlockCount // if we have enough, do not add anything if extraBlockCount <= 0 { return previous, nil @@ -1847,11 +1989,12 @@ func (fs *FileSystem) allocateExtents(size uint64, previous *extents) (*extents, var ( newExtents []extent datablockBitmaps = map[int]*bitmap.Bitmap{} + gdBlockDelta = map[int]int32{} blocksPerGroup = fs.superblock.blocksPerGroup ) var i int64 - for i = 0; i < blockGroupCount && allocated < extraBlockCount; i++ { + for i = 0; i < blockGroupCount && extraBlockCount > 0; i++ { // keep track if we allocated anything in this blockgroup // 1- read the GDT for this blockgroup to find the location of the block bitmap // and total free blocks @@ -1871,11 +2014,12 @@ func (fs *FileSystem) allocateExtents(size uint64, previous *extents) (*extents, // create possible extents by size // Step 3: Group contiguous blocks into extents var extents []extent + groupStart := uint64(fs.superblock.firstDataBlock) + uint64(i)*uint64(blocksPerGroup) for _, freeBlock := range blockList { start, length := freeBlock.Position, freeBlock.Count for length > 0 { extentLength := min(length, int(maxBlocksPerExtent)) - extents = append(extents, extent{startingBlock: uint64(start) + uint64(i)*uint64(blocksPerGroup), count: uint16(extentLength)}) + extents = append(extents, extent{startingBlock: uint64(start) + groupStart, count: uint16(extentLength)}) start += extentLength length -= extentLength } @@ -1902,30 +2046,34 @@ func (fs *FileSystem) allocateExtents(size uint64, previous *extents) (*extents, for block := extentToAdd.startingBlock; block < extentToAdd.startingBlock+uint64(extentToAdd.count); block++ { // determine what block group this block is in, and read the bitmap for that blockgroup // the extent lists the absolute block number, but the bitmap is relative to the block group - blockInGroup := block - uint64(i)*uint64(blocksPerGroup) + blockInGroup := block - groupStart if err := bs.Set(int(blockInGroup)); err != nil { - return nil, fmt.Errorf("could not clear block bitmap for block %d: %v", i, err) + return nil, fmt.Errorf("could not set block bitmap for block %d: %v", i, err) } } // do *not* write the bitmap back yet, as we do not yet know if we will be able to fulfill the entire request. // instead save it for later datablockBitmaps[int(i)] = bs + gdBlockDelta[int(i)] -= int32(extentToAdd.count) } } if extraBlockCount > 0 { return nil, fmt.Errorf("could not allocate %d blocks", extraBlockCount) } - // write the block bitmaps back to disk + // write the block bitmaps back to disk and update GDT entries for bg, bs := range datablockBitmaps { if err := fs.writeBlockBitmap(bs, bg); err != nil { return nil, fmt.Errorf("could not write block bitmap for block group %d: %v", bg, err) } + if err := fs.incrGDFreeBlocks(bg, gdBlockDelta[bg]); err != nil { + return nil, fmt.Errorf("could not update free block count in GDT for block group %d: %v", bg, err) + } } // need to update the total blocks used/free in superblock - fs.superblock.freeBlocks -= allocated + fs.superblock.freeBlocks -= newBlocks // update the blockBitmapChecksum for any updated block groups in GDT // write updated superblock and GDT to disk if err := fs.writeSuperblock(); err != nil { @@ -1957,8 +2105,8 @@ func (fs *FileSystem) readInodeBitmap(group int) (*bitmap.Bitmap, error) { } // only take bytes corresponding to the number of inodes per group - // create a bitmap - bs := bitmap.New(int(fs.superblock.blockSize) * len(fs.groupDescriptors.descriptors)) + // create a bitmap sized to one block (blockSize bytes = blockSize*8 bits) + bs := bitmap.NewBits(int(fs.superblock.blockSize) * 8) bs.FromBytes(b) return bs, nil } @@ -2003,8 +2151,8 @@ func (fs *FileSystem) readBlockBitmap(group int) (*bitmap.Bitmap, error) { if read != int(fs.superblock.blockSize) { return nil, fmt.Errorf("Read %d bytes instead of expected %d for block bitmap of block group %d", read, fs.superblock.blockSize, gd.number) } - // create a bitmap - bs := bitmap.New(int(fs.superblock.blockSize) * len(fs.groupDescriptors.descriptors)) + // create a bitmap sized to one block (blockSize bytes = blockSize*8 bits) + bs := bitmap.NewBits(int(fs.superblock.blockSize) * 8) bs.FromBytes(b) return bs, nil } @@ -2033,6 +2181,29 @@ func (fs *FileSystem) writeBlockBitmap(bm *bitmap.Bitmap, group int) error { return nil } +// incrGDFreeBlocks increment the number of free blocks in the group descriptor for a given block group. +// If count is negative, decrement. +func (fs *FileSystem) incrGDFreeBlocks(group int, count int32) error { + if group >= len(fs.groupDescriptors.descriptors) { + return fmt.Errorf("block group %d does not exist", group) + } + gd := &fs.groupDescriptors.descriptors[group] + switch { + case count > 0: + gd.freeBlocks += uint32(count) + case count < 0: + absCount := uint32(-count) + if gd.freeBlocks < absCount { + return fmt.Errorf("cannot decrement free blocks by %d in block group %d since only %d are free", -count, group, gd.freeBlocks) + } + gd.freeBlocks -= absCount + default: + // no change + } + + return fs.writeGDT() +} + func (fs *FileSystem) writeSuperblock() error { writableFile, err := fs.backend.Writable() if err != nil { @@ -2042,10 +2213,609 @@ func (fs *FileSystem) writeSuperblock() error { if err != nil { return fmt.Errorf("could not convert superblock to bytes: %v", err) } + for _, bg := range fs.backupSuperblocks { + block := bg // backupSuperblocks already contains block numbers, not block group numbers + blockStart := block * int64(fs.superblock.blockSize) + // allow that the first one requires an offset + incr := int64(0) + if block == 0 { + incr = int64(SectorSize512) * 2 + } + + // write the superblock + count, err := writableFile.WriteAt(superblockBytes, incr+blockStart) + if err != nil { + return fmt.Errorf("error writing Superblock for block %d to disk: %v", block, err) + } + if count != int(SuperblockSize) { + return fmt.Errorf("wrote %d bytes of Superblock for block %d to disk instead of expected %d", count, block, SuperblockSize) + } + } + _, err = writableFile.WriteAt(superblockBytes, int64(BootSectorSize)) return err } +// writeGDT writes the GDT to the backing store, primary and all backups. +func (fs *FileSystem) writeGDT() error { + writableFile, err := fs.backend.Writable() + if err != nil { + return err + } + gdSize := fs.superblock.groupDescriptorSize // size of a single group descriptor + if fs.superblock.features.fs64Bit { + gdSize = groupDescriptorSize64Bit + } + // now calculate how many there should be in total + gdtSize := uint64(gdSize) * fs.superblock.blockGroupCount() + gdt := fs.groupDescriptors + g := gdt.toBytes(fs.superblock.gdtChecksumType(), fs.superblock.checksumSeed) + + for _, bg := range fs.backupSuperblocks { + block := bg // backupSuperblocks already contains block numbers, not block group numbers + blockStart := block * int64(fs.superblock.blockSize) + // allow that the first one requires an offset + incr := int64(0) + if block == 0 { + incr = int64(SectorSize512) * 2 + } + + // write the GDT + count, err := writableFile.WriteAt(g, incr+blockStart+int64(SuperblockSize)) + if err != nil { + return fmt.Errorf("error writing GDT for block %d to disk: %v", block, err) + } + if count != int(gdtSize) { + return fmt.Errorf("wrote %d bytes of GDT for block %d to disk instead of expected %d", count, block, gdtSize) + } + } + + return nil +} + +func (fs *FileSystem) initJournal() error { + writable, err := fs.backend.Writable() + if err != nil { + return err + } + // Allocate blocks for the journal + // Standard journal size is 32MB to 128MB, but scale to filesystem size + // If filesystem is smaller, use a fraction of it + journalBlocks := max(1024, min(102400, fs.superblock.blockCount/32)) + journalSize := journalBlocks * uint64(fs.superblock.blockSize) + // Cap between reasonable limits + if journalSize > uint64(journalMaxSize) { + journalSize = uint64(journalMaxSize) + } + if journalSize < uint64(journalMinSize) { + journalSize = uint64(journalMinSize) + } + + // Allocate the blocks for the journal + journalExtents, err := fs.allocateExtents(journalSize, nil) + if err != nil { + return err + } + + // Create the journal inode + extentTreeParsed, err := createRootExtentTree(journalExtents, &FileSystem{ + superblock: fs.superblock, + }) + if err != nil { + return fmt.Errorf("could not create extent tree for journal: %w", err) + } + + // ensure we use the right block structure. + // inode works in 512-byte blocks consistently + journalFSBlockCount := journalExtents.blockCount() + journalInodeBlockCount := journalFSBlockCount * uint64(fs.superblock.blockSize) / 512 + + // We do not need to mark inode 8 as used in the inode bitmap + // since we marked all below "first NonReservedInode" as used already + now := time.Now() + journalInodeStruct := &inode{ + number: journalInode, + permissionsGroup: filePermissions{read: true, write: true}, + permissionsOwner: filePermissions{read: true, write: true}, + permissionsOther: filePermissions{}, + fileType: fileTypeRegularFile, + owner: 0, + group: 0, + size: journalSize, + hardLinks: 1, + blocks: journalInodeBlockCount, + flags: &inodeFlags{ + usesExtents: true, + }, + nfsFileVersion: 0, + version: 0, + inodeSize: uint16(DefaultInodeSize), + deletionTime: 0, + accessTime: now, + changeTime: now, + createTime: now, + modifyTime: now, + extendedAttributeBlock: 0, + project: 0, + extents: extentTreeParsed, + } + if err := fs.writeInode(journalInodeStruct); err != nil { + return fmt.Errorf("could not write inode for journal: %w", err) + } + + // Populate the journal file with a valid jbd2 journal superblock + // Create a journal superblock + journalSuperblock := NewJournalSuperblock(fs.superblock.blockSize, uint32(journalBlocks)) + // Set the UUID to match the filesystem UUID + if fs.superblock.uuid != nil { + journalSuperblock.uuid = fs.superblock.uuid + } + + // Serialize the journal superblock + journalSuperblockBytes, err := journalSuperblock.ToBytes() + if err != nil { + return fmt.Errorf("could not serialize journal superblock: %w", err) + } + + // Write the journal superblock at the beginning of the first journal block + // The journal starts at the first extent's starting block + if len(*journalExtents) > 0 { + firstJournalBlock := (*journalExtents)[0].startingBlock + journalOffset := int64(firstJournalBlock * uint64(fs.superblock.blockSize)) + + // Write the journal superblock + n, err := writable.WriteAt(journalSuperblockBytes, journalOffset) + if err != nil { + return fmt.Errorf("could not write journal superblock: %w", err) + } + if n != len(journalSuperblockBytes) { + return fmt.Errorf("wrote %d bytes of journal superblock instead of expected %d", n, len(journalSuperblockBytes)) + } + + // Zero out the rest of the journal blocks to ensure they're empty + // Start from the block after the superblock + remainingOffset := journalOffset + int64(JournalSuperblockSize) + remainingSize := int64(journalSize) - int64(JournalSuperblockSize) + + if remainingSize > 0 { + // Write in chunks to avoid allocating too much memory at once + chunkSize := 1024 * 1024 // 1MB chunks + zeros := make([]byte, min(chunkSize, int(remainingSize))) + for written := int64(0); written < remainingSize; { + toWrite := min(len(zeros), int(remainingSize-written)) + n, err := writable.WriteAt(zeros[:toWrite], remainingOffset+written) + if err != nil { + return fmt.Errorf("could not zero journal blocks: %w", err) + } + written += int64(n) + } + } + } + + // Store journal backup in superblock + if len(*journalExtents) > 0 { + var journalBackupData = &journalBackup{} + for i := 0; i < 15 && i < len(*journalExtents); i++ { + journalBackupData.iBlocks[i] = uint32((*journalExtents)[i].startingBlock) + } + journalBackupData.iSize = journalSize + fs.superblock.journalBackup = journalBackupData + + if err := fs.writeSuperblock(); err != nil { + return fmt.Errorf("could not update superblock with journal backup info: %w", err) + } + } + return nil +} + +func setBitmapOrErr(bm *bitmap.Bitmap, location int, context string) error { + if err := bm.Set(location); err != nil { + return fmt.Errorf("%s: %w", context, err) + } + return nil +} + +func (fs *FileSystem) buildBlockBitmapForGroup(i int, gd *groupDescriptor, groupCount uint64) (*bitmap.Bitmap, error) { + blocksPerGroup := uint64(fs.superblock.blocksPerGroup) + groupStart := uint64(fs.superblock.firstDataBlock) + uint64(i)*blocksPerGroup + remaining := fs.superblock.blockCount - groupStart + blocksInGroup := blocksPerGroup + if remaining < blocksPerGroup { + blocksInGroup = remaining + } + blockBitmapSize := int(blocksInGroup) + blockBitmapBlocks := (blockBitmapSize + int(fs.superblock.blockSize)*8 - 1) / (int(fs.superblock.blockSize) * 8) + blockBitmapSize = blockBitmapBlocks * int(fs.superblock.blockSize) * 8 + blockBitmap := bitmap.NewBits(blockBitmapSize) + if err := fs.markBlockBitmapPadding(blockBitmap, i, blocksInGroup, blockBitmapSize); err != nil { + return nil, err + } + if err := fs.markSuperBackupMetadata(blockBitmap, i, groupCount); err != nil { + return nil, err + } + + firstBlockOfGroup := groupStart + if fs.superblock.features.flexBlockGroups { + if err := fs.markFlexMetadataBlocks(blockBitmap, i, firstBlockOfGroup); err != nil { + return nil, err + } + return blockBitmap, nil + } + if err := fs.markNonFlexMetadataBlocks(blockBitmap, i, gd, firstBlockOfGroup); err != nil { + return nil, err + } + + return blockBitmap, nil +} + +func (fs *FileSystem) markBlockBitmapPadding(blockBitmap *bitmap.Bitmap, groupIndex int, blocksInGroup uint64, blockBitmapSize int) error { + for j := int(blocksInGroup); j < blockBitmapSize; j++ { + if err := setBitmapOrErr(blockBitmap, j, fmt.Sprintf("group %d block bitmap padding", groupIndex)); err != nil { + return err + } + } + return nil +} + +func (fs *FileSystem) markSuperBackupMetadata(blockBitmap *bitmap.Bitmap, groupIndex int, groupCount uint64) error { + // Check if this group has superblock backup + hasSuperBackup := false + firstMetaBG := fs.superblock.firstMetablockGroup + switch { + case groupIndex == 0 || groupIndex == 1: + hasSuperBackup = true + case firstMetaBG > 0: + hasSuperBackup = uint64(groupIndex) >= uint64(firstMetaBG) && (uint64(groupIndex)%uint64(firstMetaBG)) == 0 + default: + hasSuperBackup = checkSuperBackup(uint64(groupIndex)) + } + + metaBlocks := uint64(0) + if hasSuperBackup { + gdtBlocks := (groupCount*uint64(fs.superblock.groupDescriptorSize) + uint64(fs.superblock.blockSize) - 1) / uint64(fs.superblock.blockSize) + metaBlocks = 1 + gdtBlocks + uint64(fs.superblock.reservedGDTBlocks) + } + // Mark superblock and GDT blocks as used + for j := uint64(0); j < metaBlocks; j++ { + if err := setBitmapOrErr(blockBitmap, int(j), fmt.Sprintf("group %d metadata", groupIndex)); err != nil { + return err + } + } + return nil +} + +func (fs *FileSystem) markFlexMetadataBlocks(blockBitmap *bitmap.Bitmap, groupIndex int, firstBlockOfGroup uint64) error { + // For flex_bg, we need to mark metadata from ALL groups in the flex group + // that are stored in this group's block range + flexSize := int(fs.superblock.logGroupsPerFlex) + myFlex := groupIndex / flexSize + // Iterate through all groups and mark their metadata if it falls in this group's range + for j, otherGd := range fs.groupDescriptors.descriptors { + if j/flexSize != myFlex { + continue + } + + // Check if block bitmap is in this group's range + if otherGd.blockBitmapLocation >= firstBlockOfGroup && + otherGd.blockBitmapLocation < firstBlockOfGroup+uint64(fs.superblock.blocksPerGroup) { + blockOffset := otherGd.blockBitmapLocation - firstBlockOfGroup + if err := setBitmapOrErr(blockBitmap, int(blockOffset), fmt.Sprintf("group %d block bitmap", groupIndex)); err != nil { + return err + } + } + + // Check if inode bitmap is in this group's range + if otherGd.inodeBitmapLocation >= firstBlockOfGroup && + otherGd.inodeBitmapLocation < firstBlockOfGroup+uint64(fs.superblock.blocksPerGroup) { + blockOffset := otherGd.inodeBitmapLocation - firstBlockOfGroup + if err := setBitmapOrErr(blockBitmap, int(blockOffset), fmt.Sprintf("group %d inode bitmap", groupIndex)); err != nil { + return err + } + } + + inodeTableBlocks := groupDescriptorInodeTableBlocks(j, fs.superblock) + // Check if inode table is in this group's range + inodeTableStart := otherGd.inodeTableLocation + inodeTableEnd := inodeTableStart + inodeTableBlocks + + // Mark all blocks of the inode table that fall in this group's range + for block := inodeTableStart; block < inodeTableEnd; block++ { + if block >= firstBlockOfGroup && block < firstBlockOfGroup+uint64(fs.superblock.blocksPerGroup) { + blockOffset := block - firstBlockOfGroup + if err := setBitmapOrErr(blockBitmap, int(blockOffset), fmt.Sprintf("group %d inode table", groupIndex)); err != nil { + return err + } + } + } + } + return nil +} + +func (fs *FileSystem) markNonFlexMetadataBlocks(blockBitmap *bitmap.Bitmap, groupIndex int, gd *groupDescriptor, firstBlockOfGroup uint64) error { + // Non-flex_bg: only mark this group's own metadata + // Mark bitmap blocks and inode table blocks as used + // Block bitmap, inode bitmap, and inode table locations are relative to group start + blockBitmapBlock := gd.blockBitmapLocation - firstBlockOfGroup + inodeBitmapBlock := gd.inodeBitmapLocation - firstBlockOfGroup + inodeTableBlock := gd.inodeTableLocation - firstBlockOfGroup + + // Mark block bitmap block + if blockBitmapBlock < uint64(fs.superblock.blocksPerGroup) { + if err := setBitmapOrErr(blockBitmap, int(blockBitmapBlock), fmt.Sprintf("group %d block bitmap", groupIndex)); err != nil { + return err + } + } + + // Mark inode bitmap block + if inodeBitmapBlock < uint64(fs.superblock.blocksPerGroup) { + if err := setBitmapOrErr(blockBitmap, int(inodeBitmapBlock), fmt.Sprintf("group %d inode bitmap", groupIndex)); err != nil { + return err + } + } + + // Mark inode table blocks + inodeTableBlocks := groupDescriptorInodeTableBlocks(groupIndex, fs.superblock) + for j := uint64(0); j < inodeTableBlocks; j++ { + if inodeTableBlock+j < uint64(fs.superblock.blocksPerGroup) { + if err := setBitmapOrErr(blockBitmap, int(inodeTableBlock+j), fmt.Sprintf("group %d inode table", groupIndex)); err != nil { + return err + } + } + } + + return nil +} + +func (fs *FileSystem) buildInodeBitmapForGroup(i int) (*bitmap.Bitmap, error) { + // Initialize inode bitmap - all inodes free initially + // the size of the bitmap should match the number of inodes per group + // but padded (with 1s) to the nearest block size + inodeBitmapSize := int(fs.superblock.inodesPerGroup) + inodeBitmapBlocks := (inodeBitmapSize + int(fs.superblock.blockSize)*8 - 1) / (int(fs.superblock.blockSize) * 8) + inodeBitmapSize = inodeBitmapBlocks * int(fs.superblock.blockSize) * 8 + inodeBitmap := bitmap.NewBits(inodeBitmapSize) + // set 1 padding on anything past inodesPerGroup + for j := int(fs.superblock.inodesPerGroup); j < inodeBitmapSize; j++ { + if err := setBitmapOrErr(inodeBitmap, j, fmt.Sprintf("group %d inode bitmap padding", i)); err != nil { + return nil, err + } + } + + // Mark reserved inodes as used (inodes 1-10 are reserved, 11 onwards are available) + if i == 0 { + // First group has reserved inodes (1-10) + // Note: lostFoundInode (11) is NOT reserved, it's created as a directory + for j := 1; j < int(firstNonReservedInode); j++ { + if j < int(fs.superblock.inodesPerGroup) { + if err := setBitmapOrErr(inodeBitmap, j-1, fmt.Sprintf("group %d reserved inode %d", i, j)); err != nil { + return nil, err + } + } + } + } + + return inodeBitmap, nil +} + +func (fs *FileSystem) initGroupDescriptorTables() error { + writable, err := fs.backend.Writable() + if err != nil { + return err + } + // Initialize and write bitmaps and inode tables for each block group + groupCount := fs.superblock.blockGroupCount() + for i := range fs.groupDescriptors.descriptors { + gd := &fs.groupDescriptors.descriptors[i] + blockBitmap, err := fs.buildBlockBitmapForGroup(i, gd, groupCount) + if err != nil { + return err + } + inodeBitmap, err := fs.buildInodeBitmapForGroup(i) + if err != nil { + return err + } + + // Write block bitmap + blockBitmapBytes := blockBitmap.ToBytes() + blockBitmapOffset := int64(gd.blockBitmapLocation * uint64(fs.superblock.blockSize)) + count, err := writable.WriteAt(blockBitmapBytes, blockBitmapOffset) + if err != nil { + return fmt.Errorf("error writing block bitmap for group %d: %v", i, err) + } + if count != len(blockBitmapBytes) { + return fmt.Errorf("wrote %d bytes of block bitmap for group %d instead of expected %d", count, i, len(blockBitmapBytes)) + } + + // Write inode bitmap + inodeBitmapBytes := inodeBitmap.ToBytes() + inodeBitmapOffset := int64(gd.inodeBitmapLocation * uint64(fs.superblock.blockSize)) + count, err = writable.WriteAt(inodeBitmapBytes, inodeBitmapOffset) + if err != nil { + return fmt.Errorf("error writing inode bitmap for group %d: %v", i, err) + } + if count != len(inodeBitmapBytes) { + return fmt.Errorf("wrote %d bytes of inode bitmap for group %d instead of expected %d", count, i, len(inodeBitmapBytes)) + } + + // Initialize inode table - zero it out + inodeTableBlocks := groupDescriptorInodeTableBlocks(i, fs.superblock) + inodeTableSize := int(inodeTableBlocks * uint64(fs.superblock.blockSize)) + inodeTableBytes := make([]byte, inodeTableSize) + inodeTableOffset := int64(gd.inodeTableLocation * uint64(fs.superblock.blockSize)) + count, err = writable.WriteAt(inodeTableBytes, inodeTableOffset) + if err != nil { + return fmt.Errorf("error writing inode table for group %d: %v", i, err) + } + if count != inodeTableSize { + return fmt.Errorf("wrote %d bytes of inode table for group %d instead of expected %d", count, i, inodeTableSize) + } + } + return nil +} + +func (fs *FileSystem) initResizeInode() error { + now := time.Now() + writable, err := fs.backend.Writable() + if err != nil { + return err + } + + blocksPerGroup := uint64(fs.superblock.blocksPerGroup) + groupCount := fs.superblock.blockGroupCount() + gdtPerBlock := fs.superblock.blockSize / uint32(fs.superblock.groupDescriptorSize) + gdtActiveBlocks := groupCount / uint64(gdtPerBlock) + if groupCount%uint64(gdtPerBlock) != 0 { + gdtActiveBlocks++ + } + // Use reserved GDT blocks in group 0 for indirect blocks and backup groups for data. + var ( + blockPointers [15]uint32 + allocatedCount uint64 + ) + + writePointerBlock := func(block uint64, ptrs []uint32) error { + buf := make([]byte, fs.superblock.blockSize) + for i, p := range ptrs { + base := i * 4 + binary.LittleEndian.PutUint32(buf[base:base+4], p) + } + _, err := writable.WriteAt(buf, int64(block)*int64(fs.superblock.blockSize)) + return err + } + + allocateIndirectBlock := func() (uint64, error) { + exts, err := fs.allocateExtents(uint64(fs.superblock.blockSize), nil) + if err != nil { + return 0, err + } + return (*exts)[0].startingBlock, nil + } + + backupGroups := calculateBackupSuperblockGroups(int64(groupCount)) + backupStarts := make([]uint32, 0, len(backupGroups)) + for _, bg := range backupGroups { + if bg == 0 { + continue + } + g := uint64(bg) + groupStart := g * blocksPerGroup + if groupStart >= fs.superblock.blockCount { + continue + } + groupBlocks := blocksPerGroup + remaining := fs.superblock.blockCount - groupStart + if remaining < groupBlocks { + groupBlocks = remaining + } + reservedStart := groupStart + uint64(fs.superblock.firstDataBlock) + 1 + gdtActiveBlocks + if reservedStart >= groupStart+groupBlocks { + continue + } + backupStarts = append(backupStarts, uint32(reservedStart)) + } + if len(backupStarts) == 0 { + return fmt.Errorf("no backup groups available for resize inode data blocks") + } + + // double indirect block + doubleBlock, err := allocateIndirectBlock() + if err != nil { + return fmt.Errorf("could not allocate resize inode double indirect block: %w", err) + } + blockPointers[13] = uint32(doubleBlock) + + var secondLevelBlocks []uint32 + indirectBase := uint64(fs.superblock.firstDataBlock) + 1 + gdtActiveBlocks + indirectLimit := indirectBase + uint64(fs.superblock.reservedGDTBlocks) - 1 + // first indirect block is the last reserved GDT block (offset 255), then the rest in order. + lastIndirect := indirectLimit + offset := uint32(fs.superblock.reservedGDTBlocks - 1) + indBlock := lastIndirect + ptrs := make([]uint32, len(backupStarts)) + for i, start := range backupStarts { + ptrs[i] = start + offset + } + if err := writePointerBlock(indBlock, ptrs); err != nil { + return fmt.Errorf("could not write resize inode indirect block: %w", err) + } + secondLevelBlocks = append(secondLevelBlocks, uint32(indBlock)) + + for offset = 0; indirectBase < indirectLimit; offset++ { + indBlock = indirectBase + indirectBase++ + ptrs := make([]uint32, len(backupStarts)) + for i, start := range backupStarts { + ptrs[i] = start + offset + } + if err := writePointerBlock(indBlock, ptrs); err != nil { + return fmt.Errorf("could not write resize inode indirect block: %w", err) + } + secondLevelBlocks = append(secondLevelBlocks, uint32(indBlock)) + } + if err := writePointerBlock(doubleBlock, secondLevelBlocks); err != nil { + return fmt.Errorf("could not write resize inode double indirect block: %w", err) + } + + dataBlocks := uint64(len(backupStarts)) * uint64(fs.superblock.reservedGDTBlocks) + indirectBlocks := uint64(fs.superblock.reservedGDTBlocks) + 1 // 256 second-level + double + allocatedCount = dataBlocks + indirectBlocks + + flexGroups := fs.superblock.logGroupsPerFlex + sizeBlocks := blocksPerGroup*flexGroups + uint64(fs.superblock.reservedGDTBlocks) + 12 + sizeBytes := sizeBlocks * uint64(fs.superblock.blockSize) + allocatedBlocks := allocatedCount * uint64(fs.superblock.blockSize) / 512 + in := inode{ + number: groupDescriptorsInode, + permissionsOwner: filePermissions{read: true, write: true}, + permissionsGroup: filePermissions{}, + permissionsOther: filePermissions{}, + fileType: fileTypeRegularFile, + owner: 0, + group: 0, + size: sizeBytes, + hardLinks: 1, + blocks: allocatedBlocks, + flags: &inodeFlags{ + usesExtents: false, + }, + nfsFileVersion: 0, + version: 0, + inodeSize: fs.superblock.inodeSize, + deletionTime: 0, + accessTime: now, + changeTime: now, + createTime: now, + modifyTime: now, + extendedAttributeBlock: 0, + project: 0, + blockPointers: blockPointers, + } + // write the inode to disk + return fs.writeInode(&in) +} + +func calculateGDTBytes(gdt groupDescriptors, superblockCount int, checksumType gdtChecksumType, hashSeed uint32) uint64 { + singleTable := gdt.toBytes(checksumType, hashSeed) + return uint64(len(singleTable)) * uint64(superblockCount) +} + +func groupDescriptorInodeTableBlocks(index int, sb *superblock) uint64 { + start := uint64(index) * uint64(sb.inodesPerGroup) + + if start >= uint64(sb.inodeCount) { + return 0 + } + + remaining := uint64(sb.inodeCount) - start + actual := uint64(sb.inodesPerGroup) + if remaining < actual { + actual = remaining + } + + return (actual*uint64(sb.inodeSize) + uint64(sb.blockSize) - 1) / + uint64(sb.blockSize) +} + func blockGroupForInode(inodeNumber int, inodesPerGroup uint32) int { return (inodeNumber - 1) / int(inodesPerGroup) } @@ -2067,7 +2837,7 @@ func buildGroupDescriptorsFromSuperblock(sb *superblock) groupDescriptors { useFlexBg := sb.features.flexBlockGroups flexSize := uint64(1) if useFlexBg { - flexSize = 1 << sb.logGroupsPerFlex + flexSize = sb.logGroupsPerFlex } descs := make([]groupDescriptor, groups) @@ -2077,7 +2847,7 @@ func buildGroupDescriptorsFromSuperblock(sb *superblock) groupDescriptors { d.number = uint16(g) d.size = descSize - firstBlockOfGroup := uint64(g) * blocksPerGroup + firstBlockOfGroup := uint64(sb.firstDataBlock) + uint64(g)*blocksPerGroup // Determine if this group holds a SB+GDT backup. hasSuperBackup := false if useMetaBg { @@ -2092,20 +2862,46 @@ func buildGroupDescriptorsFromSuperblock(sb *superblock) groupDescriptors { gdtBlocks := (uint64(groups)*uint64(descSize) + uint64(sb.blockSize) - 1) / uint64(sb.blockSize) - metaBlocks = 1 + gdtBlocks + metaBlocks = 1 + gdtBlocks + uint64(sb.reservedGDTBlocks) } // flex_bg owner group flexOwner := (uint64(g) / flexSize) * flexSize + // Calculate metadata blocks for the flex owner + flexOwnerMetaBlocks := uint64(0) + if useFlexBg { + flexOwnerHasSuperBackup := false + if useMetaBg { + flexOwnerHasSuperBackup = flexOwner >= firstMetaBg && (flexOwner%firstMetaBg) == 0 + } else { + flexOwnerHasSuperBackup = checkSuperBackup(flexOwner) + } + if flexOwnerHasSuperBackup { + gdtBlocks := + (uint64(groups)*uint64(descSize) + uint64(sb.blockSize) - 1) / + uint64(sb.blockSize) + flexOwnerMetaBlocks = 1 + gdtBlocks + uint64(sb.reservedGDTBlocks) + } + } + // Base block numbers - bitmapBase := flexOwner*blocksPerGroup + metaBlocks + // When there's a superblock backup in the flex owner, metadata includes: + // - 1 block for superblock (or reserved space when firstDataBlock > 0) + // - gdtBlocks for GDT + // Account for firstDataBlock offset when blocksize == 1024 if useFlexBg { - // all groups in a flex share the same bitmap/table set - d.blockBitmapLocation = bitmapBase - d.inodeBitmapLocation = bitmapBase + 1 - d.inodeTableLocation = bitmapBase + 2 + flexOwnerStart := uint64(sb.firstDataBlock) + flexOwner*blocksPerGroup + bitmapBase := flexOwnerStart + flexOwnerMetaBlocks + perGroupMeta := uint64(2) + inodeTableBlocks + groupInFlex := uint64(g) - flexOwner + + base := bitmapBase + groupInFlex*perGroupMeta + + d.blockBitmapLocation = base + d.inodeBitmapLocation = base + 1 + d.inodeTableLocation = base + 2 } else { d.blockBitmapLocation = firstBlockOfGroup + metaBlocks d.inodeBitmapLocation = d.blockBitmapLocation + 1 @@ -2113,15 +2909,33 @@ func buildGroupDescriptorsFromSuperblock(sb *superblock) groupDescriptors { } // Free blocks accounting + // Last group may be partial, so compute actual blocks in group + groupStart := uint64(sb.firstDataBlock) + uint64(g)*blocksPerGroup + remaining := sb.blockCount - groupStart + blocksInGroup := blocksPerGroup + if remaining < blocksPerGroup { + blocksInGroup = remaining + } overhead := metaBlocks - if !useFlexBg || uint64(g) == flexOwner { - overhead += 1 + 1 + inodeTableBlocks + if useFlexBg { + if uint64(g) == flexOwner { + // how many groups actually exist in this flex (last flex may be partial) + remaining := uint64(groups) - flexOwner + groupsInFlex := flexSize + if remaining < groupsInFlex { + groupsInFlex = remaining + } + perGroupMeta := uint64(2) + inodeTableBlocks + overhead += groupsInFlex * perGroupMeta + } + } else { + overhead += 2 + inodeTableBlocks } - if overhead > blocksPerGroup { - overhead = blocksPerGroup + if overhead > blocksInGroup { + overhead = blocksInGroup } - d.freeBlocks = uint32(blocksPerGroup - overhead) + d.freeBlocks = uint32(blocksInGroup - overhead) d.freeInodes = inodesPerGroup d.usedDirectories = 0 d.flags = blockGroupFlags{} @@ -2137,7 +2951,7 @@ func buildGroupDescriptorsFromSuperblock(sb *superblock) groupDescriptors { } func checkSuperBackup(g uint64) bool { - if g == 0 { + if g == 0 || g == 1 { return true } for _, n := range []uint64{3, 5, 7} { @@ -2149,6 +2963,7 @@ func checkSuperBackup(g uint64) bool { } return false } + func validatePath(name string) error { if !iofs.ValidPath(name) { return iofs.ErrInvalid diff --git a/filesystem/ext4/ext4_test.go b/filesystem/ext4/ext4_test.go index af36fc2..9c7f74f 100644 --- a/filesystem/ext4/ext4_test.go +++ b/filesystem/ext4/ext4_test.go @@ -8,6 +8,7 @@ import ( "io" iofs "io/fs" "os" + "os/exec" "path" "path/filepath" "slices" @@ -217,10 +218,10 @@ func testCreateImgCopyFrom(t *testing.T, src string) string { return outfile } -func testCreateEmptyFile(t *testing.T, size int64) *os.File { +func testCreateEmptyFile(t *testing.T, size int64) (outfile string, f *os.File) { t.Helper() dir := t.TempDir() - outfile := filepath.Join(dir, "ext4.img") + outfile = filepath.Join(dir, "ext4.img") f, err := os.Create(outfile) if err != nil { t.Fatalf("Error creating empty image file: %v", err) @@ -231,7 +232,7 @@ func testCreateEmptyFile(t *testing.T, size int64) *os.File { if err != nil { t.Fatalf("Error truncating image file: %v", err) } - return f + return outfile, f } func TestWriteFile(t *testing.T) { @@ -528,7 +529,7 @@ func TestMkdir(t *testing.T) { } func TestCreate(t *testing.T) { - f := testCreateEmptyFile(t, 100*MB) + outfile, f := testCreateEmptyFile(t, 100*MB) fs, err := Create(file.New(f, false), 100*MB, 0, 512, &Params{}) if err != nil { t.Fatalf("Error creating ext4 filesystem: %v", err) @@ -536,6 +537,19 @@ func TestCreate(t *testing.T) { if fs == nil { t.Fatalf("Expected non-nil filesystem after creation") } + // Sync the file to disk before running e2fsck + if err := f.Sync(); err != nil { + t.Fatalf("Error syncing file: %v", err) + } + // check that the filesystem is valid using external tools + cmd := exec.Command("e2fsck", "-f", "-n", "-vv", outfile) + stdout := bytes.NewBuffer(nil) + stderr := bytes.NewBuffer(nil) + cmd.Stdout = stdout + cmd.Stderr = stderr + if err := cmd.Run(); err != nil { + t.Fatalf("e2fsck failed: %v,\nstdout:\n%s,\n\nstderr:\n%s", err, stdout.String(), stderr.String()) + } } func TestChtimes(t *testing.T) { diff --git a/filesystem/ext4/extent.go b/filesystem/ext4/extent.go index c107c4c..06c3bd2 100644 --- a/filesystem/ext4/extent.go +++ b/filesystem/ext4/extent.go @@ -39,7 +39,9 @@ func (e *extent) equal(a *extent) bool { return *e == *a } -// blockCount how many blocks are covered in the extents +// blockCount how many filesystem blocks are covered in the extents. +// Remember that these are filesystem blocks, which can vary, not the fixed 512-byte sectors on disk, +// often used in superblock or inode in various places. // //nolint:unused // useful function for future func (e extents) blockCount() uint64 { @@ -669,12 +671,11 @@ func splitInternalNode(node *extentInternalNode, newChild *extentChildPtr, fs *F } func writeNodeToDisk(node extentBlockFinder, fs *FileSystem, parent *extentInternalNode) error { - var blockNumber uint64 - if parent != nil { - blockNumber = getBlockNumberFromNode(node, parent) - } else { - blockNumber = getNewBlockNumber(fs) + // Root nodes live in the inode; only write when there's a parent block. + if parent == nil { + return nil } + blockNumber := getBlockNumberFromNode(node, parent) if blockNumber == 0 { return fmt.Errorf("block number not found for node") @@ -736,3 +737,15 @@ func loadChildNode(childPtr *extentChildPtr, fs *FileSystem) (extentBlockFinder, // Implement the logic to decode the node from the data return node, nil } + +func extentsBlockFinderFromExtents(exts extents, blocksize uint32) extentBlockFinder { + return &extentLeafNode{ + extentNodeHeader: extentNodeHeader{ + depth: 0, + entries: uint16(len(exts)), + max: 4, // assuming max 4 for leaf nodes in inode + blockSize: blocksize, + }, + extents: exts, + } +} diff --git a/filesystem/ext4/features.go b/filesystem/ext4/features.go index 9a8baa9..c1035e6 100644 --- a/filesystem/ext4/features.go +++ b/filesystem/ext4/features.go @@ -242,14 +242,16 @@ func (f *featureFlags) toInts() (compatFlags, incompatFlags, roCompatFlags uint3 features = has_journal,extent,huge_file,flex_bg,uninit_bg,64bit,dir_nlink,extra_isize */ var defaultFeatureFlags = featureFlags{ - largeFile: true, - hugeFile: true, - sparseSuperblock: true, - flexBlockGroups: true, - hasJournal: true, - extents: true, - fs64Bit: true, - extendedAttributes: true, + largeFile: true, + hugeFile: true, + sparseSuperblock: true, + flexBlockGroups: true, + hasJournal: true, + extents: true, + fs64Bit: true, + extendedAttributes: true, + directoryEntriesRecordFileType: true, + reservedGDTBlocksForExpansion: true, } type FeatureOpt func(*featureFlags) diff --git a/filesystem/ext4/file.go b/filesystem/ext4/file.go index 876ade4..85ec6f1 100644 --- a/filesystem/ext4/file.go +++ b/filesystem/ext4/file.go @@ -13,13 +13,14 @@ var _ fs.File = (*File)(nil) // File represents a single file in an ext4 filesystem type File struct { - *directoryEntry *inode isReadWrite bool isAppend bool offset int64 filesystem *FileSystem extents extents + fileType directoryFileType + filename string } // Read reads up to len(b) bytes from the File. @@ -124,9 +125,9 @@ func (fl *File) Write(b []byte) (int, error) { if fl.size%blocksize > 0 { newBlockCount++ } - blocksNeeded := newBlockCount - blockCount - bytesNeeded := blocksNeeded * blocksize if newBlockCount > blockCount { + blocksNeeded := newBlockCount - blockCount + bytesNeeded := blocksNeeded * blocksize newExtents, err := fl.filesystem.allocateExtents(bytesNeeded, &fl.extents) if err != nil { return 0, fmt.Errorf("could not allocate disk space for file %w", err) @@ -136,6 +137,11 @@ func (fl *File) Write(b []byte) (int, error) { return 0, fmt.Errorf("could not convert extents into tree: %w", err) } fl.inode.extents = extentTreeParsed + updatedExtents, err := fl.inode.extents.blocks(fl.filesystem) + if err != nil { + return 0, fmt.Errorf("could not read updated extents: %w", err) + } + fl.extents = updatedExtents fl.blocks = newBlockCount } @@ -225,7 +231,7 @@ func (fl *File) Stat() (fs.FileInfo, error) { modTime: fl.modifyTime, name: fl.filename, size: int64(fl.size), - isDir: fl.directoryEntry.fileType == dirFileTypeDirectory, + isDir: fl.fileType == dirFileTypeDirectory, mode: fl.permissionsToMode(), sys: &StatT{ UID: fl.owner, diff --git a/filesystem/ext4/groupdescriptors.go b/filesystem/ext4/groupdescriptors.go index ced08f5..70493ac 100644 --- a/filesystem/ext4/groupdescriptors.go +++ b/filesystem/ext4/groupdescriptors.go @@ -122,7 +122,7 @@ func (gds *groupDescriptors) toBytes(checksumType gdtChecksumType, hashSeed uint } // byFreeBlocks provides a sorted list of groupDescriptors by free blocks, descending. -// If you want them ascending, sort if. +// If you want them ascending, sort it. func (gds *groupDescriptors) byFreeBlocks() []groupDescriptor { // make a copy of the slice gdSlice := make([]groupDescriptor, len(gds.descriptors)) diff --git a/filesystem/ext4/inode.go b/filesystem/ext4/inode.go index 4ec4d77..29b59a4 100644 --- a/filesystem/ext4/inode.go +++ b/filesystem/ext4/inode.go @@ -136,6 +136,7 @@ type inode struct { inodeSize uint16 project uint32 extents extentBlockFinder + blockPointers [15]uint32 linkTarget string } @@ -263,7 +264,7 @@ func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { ) if fileType == fileTypeSymbolicLink && fileSizeNum < 60 { linkTarget = string(extentInfo[:fileSizeNum]) - } else { + } else if flags.usesExtents { // parse the extent information in the inode to get the root of the extents tree // we do not walk the entire tree, to get a slice of blocks for the file. // If we want to do that, we call the extentBlockFinder.blocks() method @@ -273,6 +274,14 @@ func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { } } + var blockPointers [15]uint32 + if !flags.usesExtents && (fileType != fileTypeSymbolicLink || fileSizeNum >= 60) { + for i := 0; i < 15; i++ { + offset := i * 4 + blockPointers[i] = binary.LittleEndian.Uint32(extentInfo[offset : offset+4]) + } + } + i := inode{ number: number, permissionsGroup: parseGroupPermissions(mode), @@ -297,6 +306,7 @@ func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { extendedAttributeBlock: binary.LittleEndian.Uint64(extendedAttributeBlock), project: binary.LittleEndian.Uint32(b[0x9c:0x100]), extents: allExtents, + blockPointers: blockPointers, linkTarget: linkTarget, } checksum := binary.LittleEndian.Uint32(checksumBytes) @@ -369,7 +379,14 @@ func (i *inode) toBytes(sb *superblock) []byte { copy(b[0x1c:0x20], blocks[0:4]) binary.LittleEndian.PutUint32(b[0x20:0x24], i.flags.toInt()) copy(b[0x24:0x28], version[0:4]) - copy(b[0x28:0x64], i.extents.toBytes()) + if i.flags != nil && i.flags.usesExtents { + copy(b[0x28:0x64], i.extents.toBytes()) + } else { + for idx, ptr := range i.blockPointers { + base := 0x28 + idx*4 + binary.LittleEndian.PutUint32(b[base:base+4], ptr) + } + } binary.LittleEndian.PutUint32(b[0x64:0x68], i.nfsFileVersion) copy(b[0x68:0x6c], extendedAttributeBlock[0:4]) copy(b[0x6c:0x70], fileSize[4:8]) diff --git a/filesystem/ext4/journal.go b/filesystem/ext4/journal.go new file mode 100644 index 0000000..7b8c3ce --- /dev/null +++ b/filesystem/ext4/journal.go @@ -0,0 +1,736 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" + "time" + + "github.com/diskfs/go-diskfs/filesystem/ext4/crc" + "github.com/google/uuid" +) + +// Journal block types +type journalBlockType uint32 + +const ( + journalBlockTypeDescriptor journalBlockType = 1 + journalBlockTypeCommit journalBlockType = 2 + journalBlockTypeSuperblockV1 journalBlockType = 3 + journalBlockTypeSuperblockV2 journalBlockType = 4 + journalBlockTypeRevoke journalBlockType = 5 + + // Journal magic number + journalMagic uint32 = 0xC03B3998 + + // Checksum types + checksumTypeCRC32 = 1 + checksumTypeMD5 = 2 + checksumTypeSHA1 = 3 + checksumTypeCRC32C = 4 + + // Feature flags for jbd2 journal + jbd2CompatFeatureChecksum uint32 = 0x1 + jbd2IncompatFeatureRevoke uint32 = 0x1 + jbd2IncompatFeature64Bit uint32 = 0x2 + jbd2IncompatFeatureAsyncCommit uint32 = 0x4 + jbd2IncompatFeatureChecksumV2 uint32 = 0x8 + jbd2IncompatFeatureChecksumV3 uint32 = 0x10 + jbd2IncompatFeatureFastCommit uint32 = 0x20 + + // Tag flags + tagFlagEscaped uint16 = 0x1 + tagFlagSameUUID uint16 = 0x2 + tagFlagDeleted uint16 = 0x4 + tagFlagLast uint16 = 0x8 + + // Journal superblock size + JournalSuperblockSize = 1024 +) + +// journalHeader is the common 12-byte header for all journal blocks +type journalHeader struct { + magic uint32 // Should be journalMagic (0xC03B3998) + blockType journalBlockType + sequence uint32 +} + +// JournalSuperblock represents the jbd2 journal superblock +type JournalSuperblock struct { + header *journalHeader + blockSize uint32 + maxLen uint32 + first uint32 + sequence uint32 + start uint32 + errno uint32 + compatFeatures uint32 + incompatFeatures uint32 + roCompatFeatures uint32 + uuid *uuid.UUID + nrUsers uint32 + dynsuper uint32 + maxTransaction uint32 + maxTransData uint32 + checksumType byte + maxFCBlocks uint32 + head uint32 + checksum uint32 +} + +// journalBlockTag represents a block tag in a descriptor block (v3 format) +type journalBlockTag struct { + blockNr uint64 // 32-bit lower, 32-bit upper + flags uint32 + checksum uint32 + uuid []byte // 16 bytes, only present if tagFlagSameUUID is not set +} + +// journalBlockTagV2 represents a block tag in v2 format (variable size) +type journalBlockTagV2 struct { + blockNr uint32 // lower 32 bits + checksum uint16 + flags uint16 + blockNrHigh uint32 // upper 32 bits (if 64-bit feature enabled) + uuid []byte // 16 bytes, only present if tagFlagSameUUID is not set +} + +// journalDescriptorBlock represents a descriptor block containing block tags +type journalDescriptorBlock struct { + header *journalHeader + tags []*journalBlockTag + tail *journalBlockTail // If checksum features enabled +} + +// journalBlockTail is appended to descriptor and revoke blocks when checksums are enabled +type journalBlockTail struct { + checksum uint32 +} + +// journalCommitBlock represents a commit block +type journalCommitBlock struct { + header *journalHeader + checksumType byte + checksumSize byte + checksums [32]byte // Space for checksums + commitSec uint64 // Seconds since epoch + commitNsec uint32 // Nanoseconds component + checksumTail *journalBlockTail +} + +// journalRevokeBlock represents a revocation block +type journalRevokeBlock struct { + header *journalHeader + count uint32 + blocks []uint64 // Variable length array of block numbers + tail *journalBlockTail +} + +// journalHeader methods + +// journalHeaderFromBytes creates a journalHeader from bytes +func journalHeaderFromBytes(b []byte) (*journalHeader, error) { + if len(b) < 12 { + return nil, fmt.Errorf("cannot read journal header from %d bytes, need at least 12", len(b)) + } + + magic := binary.BigEndian.Uint32(b[0x0:0x4]) + if magic != journalMagic { + return nil, fmt.Errorf("invalid journal magic: 0x%x (expected 0x%x)", magic, journalMagic) + } + + return &journalHeader{ + magic: magic, + blockType: journalBlockType(binary.BigEndian.Uint32(b[0x4:0x8])), + sequence: binary.BigEndian.Uint32(b[0x8:0xc]), + }, nil +} + +// toBytes converts journalHeader to bytes +func (jh *journalHeader) toBytes() []byte { + b := make([]byte, 12) + binary.BigEndian.PutUint32(b[0x0:0x4], jh.magic) + binary.BigEndian.PutUint32(b[0x4:0x8], uint32(jh.blockType)) + binary.BigEndian.PutUint32(b[0x8:0xc], jh.sequence) + return b +} + +// JournalSuperblock methods + +// JournalSuperblockFromBytes creates a JournalSuperblock from bytes +func JournalSuperblockFromBytes(b []byte) (*JournalSuperblock, error) { + if len(b) != JournalSuperblockSize { + return nil, fmt.Errorf("cannot read journal superblock from %d bytes, expected %d", len(b), JournalSuperblockSize) + } + + // Parse the header (first 12 bytes) + header, err := journalHeaderFromBytes(b[0x0:0xc]) + if err != nil { + return nil, fmt.Errorf("invalid journal superblock header: %v", err) + } + + if header.blockType != journalBlockTypeSuperblockV1 && header.blockType != journalBlockTypeSuperblockV2 { + return nil, fmt.Errorf("expected journal superblock type (3 or 4), got %d", header.blockType) + } + + js := &JournalSuperblock{ + header: header, + blockSize: binary.BigEndian.Uint32(b[0xc:0x10]), + maxLen: binary.BigEndian.Uint32(b[0x10:0x14]), + first: binary.BigEndian.Uint32(b[0x14:0x18]), + sequence: binary.BigEndian.Uint32(b[0x18:0x1c]), + start: binary.BigEndian.Uint32(b[0x1c:0x20]), + errno: binary.BigEndian.Uint32(b[0x20:0x24]), + } + + // V2 superblock fields + if header.blockType == journalBlockTypeSuperblockV2 { + js.compatFeatures = binary.BigEndian.Uint32(b[0x24:0x28]) + js.incompatFeatures = binary.BigEndian.Uint32(b[0x28:0x2c]) + js.roCompatFeatures = binary.BigEndian.Uint32(b[0x2c:0x30]) + + // UUID (16 bytes) + uuidBytes := make([]byte, 16) + copy(uuidBytes, b[0x30:0x40]) + parsedUUID, err := uuid.FromBytes(uuidBytes) + if err == nil { + js.uuid = &parsedUUID + } + + js.nrUsers = binary.BigEndian.Uint32(b[0x40:0x44]) + js.dynsuper = binary.BigEndian.Uint32(b[0x44:0x48]) + js.maxTransaction = binary.BigEndian.Uint32(b[0x48:0x4c]) + js.maxTransData = binary.BigEndian.Uint32(b[0x4c:0x50]) + js.checksumType = b[0x50] + // 3 bytes padding at 0x51:0x54 + js.maxFCBlocks = binary.BigEndian.Uint32(b[0x54:0x58]) + js.head = binary.BigEndian.Uint32(b[0x58:0x5c]) + // 160 bytes padding at 0x5c:0xfc + js.checksum = binary.BigEndian.Uint32(b[0xfc:0x100]) + } + + return js, nil +} + +// ToBytes converts JournalSuperblock to bytes +func (js *JournalSuperblock) ToBytes() ([]byte, error) { + b := make([]byte, JournalSuperblockSize) + + // Write header + if js.header == nil { + js.header = &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeSuperblockV2, + sequence: 0, + } + } + headerBytes := js.header.toBytes() + copy(b[0x0:0xc], headerBytes) + + // Write basic fields + binary.BigEndian.PutUint32(b[0xc:0x10], js.blockSize) + binary.BigEndian.PutUint32(b[0x10:0x14], js.maxLen) + binary.BigEndian.PutUint32(b[0x14:0x18], js.first) + binary.BigEndian.PutUint32(b[0x18:0x1c], js.sequence) + binary.BigEndian.PutUint32(b[0x1c:0x20], js.start) + binary.BigEndian.PutUint32(b[0x20:0x24], js.errno) + + // V2 superblock fields + binary.BigEndian.PutUint32(b[0x24:0x28], js.compatFeatures) + binary.BigEndian.PutUint32(b[0x28:0x2c], js.incompatFeatures) + binary.BigEndian.PutUint32(b[0x2c:0x30], js.roCompatFeatures) + + // UUID + if js.uuid != nil { + copy(b[0x30:0x40], js.uuid[:]) + } + + binary.BigEndian.PutUint32(b[0x40:0x44], js.nrUsers) + binary.BigEndian.PutUint32(b[0x44:0x48], js.dynsuper) + binary.BigEndian.PutUint32(b[0x48:0x4c], js.maxTransaction) + binary.BigEndian.PutUint32(b[0x4c:0x50], js.maxTransData) + b[0x50] = js.checksumType + // 3 bytes padding at 0x51:0x54 + binary.BigEndian.PutUint32(b[0x54:0x58], js.maxFCBlocks) + binary.BigEndian.PutUint32(b[0x58:0x5c], js.head) + // 160 bytes padding at 0x5c:0xfc + + // Calculate and write checksum + switch { + case js.incompatFeatures&jbd2IncompatFeatureChecksumV3 != 0: + // V3 checksum: CRC32C of UUID + superblock up to checksum field + if js.uuid != nil { + binary.BigEndian.PutUint32(b[0xfc:0x100], 0) + checksum := crc.CRC32c(0xffffffff, js.uuid[:]) + checksum = crc.CRC32c(checksum, b[:0xfc]) + binary.BigEndian.PutUint32(b[0xfc:0x100], checksum) + } + case js.compatFeatures&jbd2CompatFeatureChecksum != 0: + // V2 checksum: same calculation + if js.uuid != nil { + binary.BigEndian.PutUint32(b[0xfc:0x100], 0) + checksum := crc.CRC32c(0xffffffff, js.uuid[:]) + checksum = crc.CRC32c(checksum, b[:0xfc]) + binary.BigEndian.PutUint32(b[0xfc:0x100], checksum) + } + default: + binary.BigEndian.PutUint32(b[0xfc:0x100], js.checksum) + } + + // 768 bytes of user IDs at 0x100:0x400 (not used currently) + + return b, nil +} + +// SupportsFeature checks if a given incompatible feature is set +func (js *JournalSuperblock) SupportsFeature(feature uint32) bool { + return js.incompatFeatures&feature != 0 +} + +// SupportsCompatFeature checks if a given compatible feature is set +func (js *JournalSuperblock) SupportsCompatFeature(feature uint32) bool { + return js.compatFeatures&feature != 0 +} + +// HasChecksums returns true if journal maintains checksums +func (js *JournalSuperblock) HasChecksums() bool { + return js.compatFeatures&jbd2CompatFeatureChecksum != 0 || + js.incompatFeatures&jbd2IncompatFeatureChecksumV2 != 0 || + js.incompatFeatures&jbd2IncompatFeatureChecksumV3 != 0 +} + +// Uses64BitBlockNumbers returns true if 64-bit block numbers are supported +func (js *JournalSuperblock) Uses64BitBlockNumbers() bool { + return js.incompatFeatures&jbd2IncompatFeature64Bit != 0 +} + +// journalDescriptorBlock methods + +// journalDescriptorBlockFromBytes creates a journalDescriptorBlock from bytes +func journalDescriptorBlockFromBytes(b []byte, superblock *JournalSuperblock) (*journalDescriptorBlock, error) { + if len(b) < 12 { + return nil, fmt.Errorf("cannot read descriptor block from %d bytes, need at least 12", len(b)) + } + + header, err := journalHeaderFromBytes(b[0x0:0xc]) + if err != nil { + return nil, fmt.Errorf("invalid descriptor block header: %v", err) + } + + if header.blockType != journalBlockTypeDescriptor { + return nil, fmt.Errorf("expected descriptor block type (1), got %d", header.blockType) + } + + dblock := &journalDescriptorBlock{ + header: header, + tags: make([]*journalBlockTag, 0), + } + + // Parse block tags + offset := 12 + for offset < len(b) { + tag, err := parseBlockTag(b[offset:], superblock) + if err != nil { + break // End of tags + } + dblock.tags = append(dblock.tags, tag) + + // Check if this is the last tag + if tag.flags&uint32(tagFlagLast) != 0 { + break + } + + // Move to next tag + tagSize := getBlockTagSize(superblock, tag) + offset += tagSize + } + + // Parse block tail if checksums are enabled + if superblock != nil && (superblock.incompatFeatures&jbd2IncompatFeatureChecksumV2 != 0 || + superblock.incompatFeatures&jbd2IncompatFeatureChecksumV3 != 0) { + if len(b) >= 4 { + tail := &journalBlockTail{ + checksum: binary.BigEndian.Uint32(b[len(b)-4:]), + } + dblock.tail = tail + } + } + + return dblock, nil +} + +// parseBlockTag parses a single block tag from bytes +func parseBlockTag(b []byte, superblock *JournalSuperblock) (*journalBlockTag, error) { + if len(b) < 16 { + return nil, fmt.Errorf("not enough bytes for block tag") + } + + tag := &journalBlockTag{} + + // Always present: blockNr (lower), flags (upper) + blockNrLower := binary.BigEndian.Uint32(b[0x0:0x4]) + tag.flags = binary.BigEndian.Uint32(b[0x4:0x8]) + tag.blockNr = uint64(blockNrLower) + + // If 64-bit support + offset := 8 + if superblock != nil && superblock.Uses64BitBlockNumbers() { + if len(b) < offset+4 { + return nil, fmt.Errorf("not enough bytes for 64-bit block tag") + } + blockNrHigh := binary.BigEndian.Uint32(b[offset : offset+4]) + tag.blockNr |= uint64(blockNrHigh) << 32 + offset += 4 + } + + // Checksum + if len(b) >= offset+4 { + tag.checksum = binary.BigEndian.Uint32(b[offset : offset+4]) + offset += 4 + } + + // UUID (if not same as previous) + if tag.flags&uint32(tagFlagSameUUID) == 0 { + if len(b) >= offset+16 { + tag.uuid = make([]byte, 16) + copy(tag.uuid, b[offset:offset+16]) + } + } + + return tag, nil +} + +// getBlockTagSize returns the size of a block tag in bytes +func getBlockTagSize(superblock *JournalSuperblock, tag *journalBlockTag) int { + size := 8 // Base: blockNr (4) + flags (4) + + if superblock != nil && superblock.Uses64BitBlockNumbers() { + size += 4 // blockNrHigh + } + + size += 4 // checksum + + if tag.flags&uint32(tagFlagSameUUID) == 0 { + size += 16 // UUID + } + + return size +} + +// ToBytes converts journalDescriptorBlock to bytes +func (dblock *journalDescriptorBlock) ToBytes(superblock *JournalSuperblock, blockSize uint32) ([]byte, error) { + b := make([]byte, blockSize) + + // Write header + if dblock.header == nil { + dblock.header = &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeDescriptor, + sequence: 0, + } + } + headerBytes := dblock.header.toBytes() + copy(b[0x0:0xc], headerBytes) + + // Write tags + offset := 12 + for i, tag := range dblock.tags { + tagBytes := tag.toBytes(i == len(dblock.tags)-1, superblock) + if offset+len(tagBytes) > len(b)-4 { // Reserve 4 bytes for tail if needed + break + } + copy(b[offset:], tagBytes) + offset += len(tagBytes) + } + + // Write block tail if checksums are enabled + if dblock.tail != nil { + if offset+4 <= len(b) { + binary.BigEndian.PutUint32(b[len(b)-4:], dblock.tail.checksum) + } + } + + return b, nil +} + +// toBytes converts a journalBlockTag to bytes +func (tag *journalBlockTag) toBytes(isLast bool, superblock *JournalSuperblock) []byte { + size := getBlockTagSize(superblock, tag) + b := make([]byte, size) + + // Write lower 32 bits of block number + binary.BigEndian.PutUint32(b[0x0:0x4], uint32(tag.blockNr&0xffffffff)) + + // Write flags + flags := tag.flags + if isLast { + flags |= uint32(tagFlagLast) + } + binary.BigEndian.PutUint32(b[0x4:0x8], flags) + + // Write upper 32 bits if 64-bit + offset := 8 + if superblock != nil && superblock.Uses64BitBlockNumbers() { + binary.BigEndian.PutUint32(b[offset:offset+4], uint32((tag.blockNr>>32)&0xffffffff)) + offset += 4 + } + + // Write checksum + binary.BigEndian.PutUint32(b[offset:offset+4], tag.checksum) + offset += 4 + + // Write UUID if present + if tag.flags&uint32(tagFlagSameUUID) == 0 && tag.uuid != nil { + copy(b[offset:offset+16], tag.uuid) + } + + return b +} + +// journalCommitBlock methods + +// journalCommitBlockFromBytes creates a journalCommitBlock from bytes +func journalCommitBlockFromBytes(b []byte) (*journalCommitBlock, error) { + if len(b) < 32 { + return nil, fmt.Errorf("cannot read commit block from %d bytes, need at least 32", len(b)) + } + + header, err := journalHeaderFromBytes(b[0x0:0xc]) + if err != nil { + return nil, fmt.Errorf("invalid commit block header: %v", err) + } + + if header.blockType != journalBlockTypeCommit { + return nil, fmt.Errorf("expected commit block type (2), got %d", header.blockType) + } + + cblock := &journalCommitBlock{ + header: header, + checksumType: b[0xc], + checksumSize: b[0xd], + commitSec: binary.BigEndian.Uint64(b[0x30:0x38]), + commitNsec: binary.BigEndian.Uint32(b[0x38:0x3c]), + } + + // Copy checksums array + copy(cblock.checksums[:], b[0x10:0x30]) + + return cblock, nil +} + +// ToBytes converts journalCommitBlock to bytes +func (cblock *journalCommitBlock) ToBytes(blockSize uint32) ([]byte, error) { + b := make([]byte, blockSize) + + // Write header + if cblock.header == nil { + cblock.header = &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeCommit, + sequence: 0, + } + } + headerBytes := cblock.header.toBytes() + copy(b[0x0:0xc], headerBytes) + + b[0xc] = cblock.checksumType + b[0xd] = cblock.checksumSize + // 2 bytes padding at 0xe:0x10 + + // Write checksums + copy(b[0x10:0x30], cblock.checksums[:]) + + // Write timestamp + binary.BigEndian.PutUint64(b[0x30:0x38], cblock.commitSec) + binary.BigEndian.PutUint32(b[0x38:0x3c], cblock.commitNsec) + + return b, nil +} + +// SetCommitTime sets the commit block timestamp to the current time +func (cblock *journalCommitBlock) SetCommitTime(t time.Time) { + cblock.commitSec = uint64(t.Unix()) + cblock.commitNsec = uint32(t.Nanosecond()) +} + +// journalRevokeBlock methods + +// journalRevokeBlockFromBytes creates a journalRevokeBlock from bytes +func journalRevokeBlockFromBytes(b []byte, superblock *JournalSuperblock) (*journalRevokeBlock, error) { + if len(b) < 16 { + return nil, fmt.Errorf("cannot read revoke block from %d bytes, need at least 16", len(b)) + } + + header, err := journalHeaderFromBytes(b[0x0:0xc]) + if err != nil { + return nil, fmt.Errorf("invalid revoke block header: %v", err) + } + + if header.blockType != journalBlockTypeRevoke { + return nil, fmt.Errorf("expected revoke block type (5), got %d", header.blockType) + } + + rblock := &journalRevokeBlock{ + header: header, + count: binary.BigEndian.Uint32(b[0xc:0x10]), + blocks: make([]uint64, 0), + } + + // Parse block numbers + offset := 16 + blockSize := uint32(4) + if superblock != nil && superblock.Uses64BitBlockNumbers() { + blockSize = 8 + } + + numBlocks := (rblock.count - 16) / blockSize + for i := uint32(0); i < numBlocks && offset < len(b); i++ { + if blockSize == 8 { + if offset+8 <= len(b) { + rblock.blocks = append(rblock.blocks, binary.BigEndian.Uint64(b[offset:offset+8])) + offset += 8 + } + } else { + if offset+4 <= len(b) { + rblock.blocks = append(rblock.blocks, uint64(binary.BigEndian.Uint32(b[offset:offset+4]))) + offset += 4 + } + } + } + + // Parse block tail if checksums are enabled + if superblock != nil && (superblock.incompatFeatures&jbd2IncompatFeatureChecksumV2 != 0 || + superblock.incompatFeatures&jbd2IncompatFeatureChecksumV3 != 0) { + if len(b) >= 4 { + tail := &journalBlockTail{ + checksum: binary.BigEndian.Uint32(b[len(b)-4:]), + } + rblock.tail = tail + } + } + + return rblock, nil +} + +// ToBytes converts journalRevokeBlock to bytes +func (rblock *journalRevokeBlock) ToBytes(superblock *JournalSuperblock, blockSize uint32) ([]byte, error) { + b := make([]byte, blockSize) + + // Write header + if rblock.header == nil { + rblock.header = &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeRevoke, + sequence: 0, + } + } + headerBytes := rblock.header.toBytes() + copy(b[0x0:0xc], headerBytes) + + // Calculate count + blockNumSize := uint32(4) + if superblock != nil && superblock.Uses64BitBlockNumbers() { + blockNumSize = 8 + } + count := 16 + uint32(len(rblock.blocks))*blockNumSize + binary.BigEndian.PutUint32(b[0xc:0x10], count) + + // Write block numbers + offset := 16 + for _, blockNum := range rblock.blocks { + if blockNumSize == 8 { + if offset+8 > len(b)-4 { // Reserve space for tail + break + } + binary.BigEndian.PutUint64(b[offset:offset+8], blockNum) + offset += 8 + } else { + if offset+4 > len(b)-4 { // Reserve space for tail + break + } + binary.BigEndian.PutUint32(b[offset:offset+4], uint32(blockNum&0xffffffff)) + offset += 4 + } + } + + // Write block tail if checksums are enabled + if rblock.tail != nil && offset+4 <= len(b) { + binary.BigEndian.PutUint32(b[len(b)-4:], rblock.tail.checksum) + } + + return b, nil +} + +// AddBlock adds a block number to the revoke list +func (rblock *journalRevokeBlock) AddBlock(blockNum uint64) { + rblock.blocks = append(rblock.blocks, blockNum) +} + +// Helper function to create a new empty journal superblock +// NewJournalSuperblock creates a new empty journal superblock with default values +// blockSize is the filesystem block size in bytes +// journalBlocks is the total number of blocks in the journal +func NewJournalSuperblock(blockSize, journalBlocks uint32) *JournalSuperblock { + newUUID, _ := uuid.NewRandom() + return &JournalSuperblock{ + header: &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeSuperblockV2, + sequence: 0, + }, + blockSize: blockSize, + maxLen: journalBlocks, + first: 1, + sequence: 1, + start: 0, + errno: 0, + compatFeatures: 0, + incompatFeatures: 0, + roCompatFeatures: 0, + uuid: &newUUID, + nrUsers: 1, + dynsuper: 0, + maxTransaction: 32768, // Default value + maxTransData: 32768, // Default value + checksumType: checksumTypeCRC32C, + maxFCBlocks: 0, + head: 0, + checksum: 0, + } +} + +// newJournalDescriptorBlock creates a new descriptor block with the given sequence number +func newJournalDescriptorBlock(sequence uint32) *journalDescriptorBlock { + return &journalDescriptorBlock{ + header: &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeDescriptor, + sequence: sequence, + }, + tags: make([]*journalBlockTag, 0), + } +} + +// newJournalCommitBlock creates a new commit block with the given sequence number +func newJournalCommitBlock(sequence uint32) *journalCommitBlock { + return &journalCommitBlock{ + header: &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeCommit, + sequence: sequence, + }, + checksumType: checksumTypeCRC32C, + checksumSize: 4, + } +} + +// newJournalRevokeBlock creates a new revoke block with the given sequence number +func newJournalRevokeBlock(sequence uint32) *journalRevokeBlock { + return &journalRevokeBlock{ + header: &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeRevoke, + sequence: sequence, + }, + blocks: make([]uint64, 0), + } +} diff --git a/filesystem/ext4/journal_test.go b/filesystem/ext4/journal_test.go new file mode 100644 index 0000000..117aaaf --- /dev/null +++ b/filesystem/ext4/journal_test.go @@ -0,0 +1,753 @@ +package ext4 + +import ( + "encoding/binary" + "testing" + "time" + + "github.com/google/uuid" +) + +// TestJournalHeaderFromBytes tests parsing a journal header from bytes +func TestJournalHeaderFromBytes(t *testing.T) { + tests := []struct { + name string + input []byte + wantErr bool + check func(*journalHeader) + }{ + { + name: "valid header with descriptor block", + input: func() []byte { + b := make([]byte, 12) + binary.BigEndian.PutUint32(b[0x0:0x4], journalMagic) + binary.BigEndian.PutUint32(b[0x4:0x8], uint32(journalBlockTypeDescriptor)) + binary.BigEndian.PutUint32(b[0x8:0xc], 42) + return b + }(), + wantErr: false, + check: func(jh *journalHeader) { + if jh.magic != journalMagic { + t.Errorf("magic = %x, want %x", jh.magic, journalMagic) + } + if jh.blockType != journalBlockTypeDescriptor { + t.Errorf("blockType = %d, want %d", jh.blockType, journalBlockTypeDescriptor) + } + if jh.sequence != 42 { + t.Errorf("sequence = %d, want 42", jh.sequence) + } + }, + }, + { + name: "valid header with commit block", + input: func() []byte { + b := make([]byte, 12) + binary.BigEndian.PutUint32(b[0x0:0x4], journalMagic) + binary.BigEndian.PutUint32(b[0x4:0x8], uint32(journalBlockTypeCommit)) + binary.BigEndian.PutUint32(b[0x8:0xc], 100) + return b + }(), + wantErr: false, + check: func(jh *journalHeader) { + if jh.blockType != journalBlockTypeCommit { + t.Errorf("blockType = %d, want %d", jh.blockType, journalBlockTypeCommit) + } + }, + }, + { + name: "invalid magic number", + input: make([]byte, 12), + wantErr: true, + }, + { + name: "insufficient bytes", + input: make([]byte, 11), + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + jh, err := journalHeaderFromBytes(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("journalHeaderFromBytes() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr && tt.check != nil { + tt.check(jh) + } + }) + } +} + +// TestJournalHeaderToBytes tests serializing a journal header to bytes +func TestJournalHeaderToBytes(t *testing.T) { + jh := &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeDescriptor, + sequence: 123, + } + + b := jh.toBytes() + + if len(b) != 12 { + t.Errorf("toBytes() returned %d bytes, want 12", len(b)) + } + + if magic := binary.BigEndian.Uint32(b[0x0:0x4]); magic != journalMagic { + t.Errorf("magic = %x, want %x", magic, journalMagic) + } + + if blockType := binary.BigEndian.Uint32(b[0x4:0x8]); blockType != uint32(journalBlockTypeDescriptor) { + t.Errorf("blockType = %d, want %d", blockType, journalBlockTypeDescriptor) + } + + if sequence := binary.BigEndian.Uint32(b[0x8:0xc]); sequence != 123 { + t.Errorf("sequence = %d, want 123", sequence) + } +} + +// TestJournalHeaderRoundTrip tests that header can be serialized and deserialized +func TestJournalHeaderRoundTrip(t *testing.T) { + original := &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeSuperblockV2, + sequence: 456, + } + + b := original.toBytes() + restored, err := journalHeaderFromBytes(b) + + if err != nil { + t.Fatalf("journalHeaderFromBytes() error = %v", err) + } + + if restored.magic != original.magic { + t.Errorf("magic mismatch: %x != %x", restored.magic, original.magic) + } + if restored.blockType != original.blockType { + t.Errorf("blockType mismatch: %d != %d", restored.blockType, original.blockType) + } + if restored.sequence != original.sequence { + t.Errorf("sequence mismatch: %d != %d", restored.sequence, original.sequence) + } +} + +// TestJournalSuperblockFromBytes tests parsing a journal superblock +func TestJournalSuperblockFromBytes(t *testing.T) { + // Create valid superblock bytes + b := make([]byte, JournalSuperblockSize) + + // Header + binary.BigEndian.PutUint32(b[0x0:0x4], journalMagic) + binary.BigEndian.PutUint32(b[0x4:0x8], uint32(journalBlockTypeSuperblockV2)) + binary.BigEndian.PutUint32(b[0x8:0xc], 0) + + // Basic fields + binary.BigEndian.PutUint32(b[0xc:0x10], 4096) // blockSize + binary.BigEndian.PutUint32(b[0x10:0x14], 1000) // maxLen + binary.BigEndian.PutUint32(b[0x14:0x18], 1) // first + binary.BigEndian.PutUint32(b[0x18:0x1c], 1) // sequence + binary.BigEndian.PutUint32(b[0x1c:0x20], 0) // start + binary.BigEndian.PutUint32(b[0x20:0x24], 0) // errno + + // V2 fields + binary.BigEndian.PutUint32(b[0x24:0x28], jbd2CompatFeatureChecksum) // compatFeatures + binary.BigEndian.PutUint32(b[0x28:0x2c], jbd2IncompatFeature64Bit) // incompatFeatures + binary.BigEndian.PutUint32(b[0x2c:0x30], 0) // roCompatFeatures + binary.BigEndian.PutUint32(b[0x40:0x44], 1) // nrUsers + binary.BigEndian.PutUint32(b[0x48:0x4c], 32768) // maxTransaction + binary.BigEndian.PutUint32(b[0x4c:0x50], 32768) // maxTransData + b[0x50] = checksumTypeCRC32C + + // UUID + testUUID, _ := uuid.NewRandom() + copy(b[0x30:0x40], testUUID[:]) + + js, err := JournalSuperblockFromBytes(b) + + if err != nil { + t.Fatalf("JournalSuperblockFromBytes() error = %v", err) + } + + if js.blockSize != 4096 { + t.Errorf("blockSize = %d, want 4096", js.blockSize) + } + + if js.maxLen != 1000 { + t.Errorf("maxLen = %d, want 1000", js.maxLen) + } + + if js.first != 1 { + t.Errorf("first = %d, want 1", js.first) + } + + if js.sequence != 1 { + t.Errorf("sequence = %d, want 1", js.sequence) + } + + if js.nrUsers != 1 { + t.Errorf("nrUsers = %d, want 1", js.nrUsers) + } + + if !js.Uses64BitBlockNumbers() { + t.Error("Uses64BitBlockNumbers() = false, want true") + } + + if js.uuid.String() != testUUID.String() { + t.Errorf("uuid mismatch: %s != %s", js.uuid.String(), testUUID.String()) + } +} + +// TestJournalSuperblockToBytes tests serializing a journal superblock +func TestJournalSuperblockToBytes(t *testing.T) { + testUUID, _ := uuid.NewRandom() + js := &JournalSuperblock{ + header: &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeSuperblockV2, + sequence: 0, + }, + blockSize: 4096, + maxLen: 1000, + first: 1, + sequence: 1, + start: 0, + errno: 0, + compatFeatures: jbd2CompatFeatureChecksum, + incompatFeatures: jbd2IncompatFeature64Bit, + roCompatFeatures: 0, + uuid: &testUUID, + nrUsers: 1, + dynsuper: 0, + maxTransaction: 32768, + maxTransData: 32768, + checksumType: checksumTypeCRC32C, + maxFCBlocks: 0, + head: 0, + checksum: 0, + } + + b, err := js.ToBytes() + + if err != nil { + t.Fatalf("ToBytes() error = %v", err) + } + + if len(b) != JournalSuperblockSize { + t.Errorf("ToBytes() returned %d bytes, want %d", len(b), JournalSuperblockSize) + } + + // Verify magic + if magic := binary.BigEndian.Uint32(b[0x0:0x4]); magic != journalMagic { + t.Errorf("magic = %x, want %x", magic, journalMagic) + } + + // Verify blockSize + if blockSize := binary.BigEndian.Uint32(b[0xc:0x10]); blockSize != 4096 { + t.Errorf("blockSize = %d, want 4096", blockSize) + } + + // Verify maxLen + if maxLen := binary.BigEndian.Uint32(b[0x10:0x14]); maxLen != 1000 { + t.Errorf("maxLen = %d, want 1000", maxLen) + } +} + +// TestJournalSuperblockRoundTrip tests serialization and deserialization +func TestJournalSuperblockRoundTrip(t *testing.T) { + testUUID, _ := uuid.NewRandom() + original := &JournalSuperblock{ + header: &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeSuperblockV2, + sequence: 0, + }, + blockSize: 4096, + maxLen: 1000, + first: 1, + sequence: 1, + start: 0, + errno: 0, + compatFeatures: jbd2CompatFeatureChecksum, + incompatFeatures: jbd2IncompatFeature64Bit, + roCompatFeatures: 0, + uuid: &testUUID, + nrUsers: 1, + dynsuper: 0, + maxTransaction: 32768, + maxTransData: 32768, + checksumType: checksumTypeCRC32C, + maxFCBlocks: 0, + head: 0, + checksum: 0, + } + + b, err := original.ToBytes() + if err != nil { + t.Fatalf("ToBytes() error = %v", err) + } + + restored, err := JournalSuperblockFromBytes(b) + if err != nil { + t.Fatalf("JournalSuperblockFromBytes() error = %v", err) + } + + if restored.blockSize != original.blockSize { + t.Errorf("blockSize: %d != %d", restored.blockSize, original.blockSize) + } + if restored.maxLen != original.maxLen { + t.Errorf("maxLen: %d != %d", restored.maxLen, original.maxLen) + } + if restored.first != original.first { + t.Errorf("first: %d != %d", restored.first, original.first) + } + if restored.sequence != original.sequence { + t.Errorf("sequence: %d != %d", restored.sequence, original.sequence) + } + if restored.uuid.String() != original.uuid.String() { + t.Errorf("uuid: %s != %s", restored.uuid.String(), original.uuid.String()) + } +} + +// TestJournalSuperblockFeatureFlags tests feature flag methods +func TestJournalSuperblockFeatureFlags(t *testing.T) { + testUUID, _ := uuid.NewRandom() + js := &JournalSuperblock{ + header: &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeSuperblockV2, + sequence: 0, + }, + blockSize: 4096, + maxLen: 1000, + first: 1, + sequence: 1, + start: 0, + errno: 0, + compatFeatures: jbd2CompatFeatureChecksum, + incompatFeatures: jbd2IncompatFeature64Bit | jbd2IncompatFeatureChecksumV3, + roCompatFeatures: 0, + uuid: &testUUID, + nrUsers: 1, + checksumType: checksumTypeCRC32C, + } + + tests := []struct { + name string + method func() bool + expected bool + }{ + { + name: "HasChecksums", + method: js.HasChecksums, + expected: true, + }, + { + name: "Uses64BitBlockNumbers", + method: js.Uses64BitBlockNumbers, + expected: true, + }, + { + name: "SupportsCompatFeature checksum", + method: func() bool { + return js.SupportsCompatFeature(jbd2CompatFeatureChecksum) + }, + expected: true, + }, + { + name: "SupportsFeature 64bit", + method: func() bool { + return js.SupportsFeature(jbd2IncompatFeature64Bit) + }, + expected: true, + }, + { + name: "SupportsFeature revoke (not set)", + method: func() bool { + return js.SupportsFeature(jbd2IncompatFeatureRevoke) + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.method() + if result != tt.expected { + t.Errorf("%s() = %v, want %v", tt.name, result, tt.expected) + } + }) + } +} + +// TestNewJournalSuperblock tests creating a new journal superblock +func TestNewJournalSuperblock(t *testing.T) { + js := NewJournalSuperblock(4096, 1000) + + if js.blockSize != 4096 { + t.Errorf("blockSize = %d, want 4096", js.blockSize) + } + + if js.maxLen != 1000 { + t.Errorf("maxLen = %d, want 1000", js.maxLen) + } + + if js.first != 1 { + t.Errorf("first = %d, want 1", js.first) + } + + if js.sequence != 1 { + t.Errorf("sequence = %d, want 1", js.sequence) + } + + if js.uuid == nil { + t.Error("uuid is nil, want valid UUID") + } + + // By default, journal should have no features enabled (matching mke2fs behavior) + if js.HasChecksums() { + t.Error("HasChecksums() = true, want false (default)") + } + + if js.Uses64BitBlockNumbers() { + t.Error("Uses64BitBlockNumbers() = true, want false (default)") + } +} + +// TestJournalCommitBlock tests commit block operations +func TestJournalCommitBlock(t *testing.T) { + cb := newJournalCommitBlock(42) + + if cb.header.blockType != journalBlockTypeCommit { + t.Errorf("blockType = %d, want %d", cb.header.blockType, journalBlockTypeCommit) + } + + if cb.header.sequence != 42 { + t.Errorf("sequence = %d, want 42", cb.header.sequence) + } + + testTime := time.Unix(1609459200, 123456789) // 2021-01-01 00:00:00 UTC + cb.SetCommitTime(testTime) + + if cb.commitSec != uint64(testTime.Unix()) { + t.Errorf("commitSec = %d, want %d", cb.commitSec, uint64(testTime.Unix())) + } + + if cb.commitNsec != uint32(testTime.Nanosecond()) { + t.Errorf("commitNsec = %d, want %d", cb.commitNsec, uint32(testTime.Nanosecond())) + } + + // Test serialization + b, err := cb.ToBytes(4096) + if err != nil { + t.Fatalf("ToBytes() error = %v", err) + } + + if len(b) != 4096 { + t.Errorf("ToBytes() returned %d bytes, want 4096", len(b)) + } + + // Verify magic in serialized form + if magic := binary.BigEndian.Uint32(b[0x0:0x4]); magic != journalMagic { + t.Errorf("serialized magic = %x, want %x", magic, journalMagic) + } +} + +// TestJournalCommitBlockRoundTrip tests commit block serialization +func TestJournalCommitBlockRoundTrip(t *testing.T) { + original := newJournalCommitBlock(123) + testTime := time.Unix(1609459200, 987654321) + original.SetCommitTime(testTime) + + b, err := original.ToBytes(4096) + if err != nil { + t.Fatalf("ToBytes() error = %v", err) + } + + restored, err := journalCommitBlockFromBytes(b) + if err != nil { + t.Fatalf("journalCommitBlockFromBytes() error = %v", err) + } + + if restored.commitSec != original.commitSec { + t.Errorf("commitSec: %d != %d", restored.commitSec, original.commitSec) + } + + if restored.commitNsec != original.commitNsec { + t.Errorf("commitNsec: %d != %d", restored.commitNsec, original.commitNsec) + } +} + +// TestJournalRevokeBlock tests revocation block operations +func TestJournalRevokeBlock(t *testing.T) { + rb := newJournalRevokeBlock(50) + + if rb.header.blockType != journalBlockTypeRevoke { + t.Errorf("blockType = %d, want %d", rb.header.blockType, journalBlockTypeRevoke) + } + + if rb.header.sequence != 50 { + t.Errorf("sequence = %d, want 50", rb.header.sequence) + } + + // Add some blocks + rb.AddBlock(100) + rb.AddBlock(200) + rb.AddBlock(300) + + if len(rb.blocks) != 3 { + t.Errorf("len(blocks) = %d, want 3", len(rb.blocks)) + } + + if rb.blocks[0] != 100 { + t.Errorf("blocks[0] = %d, want 100", rb.blocks[0]) + } + + if rb.blocks[1] != 200 { + t.Errorf("blocks[1] = %d, want 200", rb.blocks[1]) + } + + if rb.blocks[2] != 300 { + t.Errorf("blocks[2] = %d, want 300", rb.blocks[2]) + } +} + +// TestJournalRevokeBlockSerialization tests revocation block serialization +func TestJournalRevokeBlockSerialization(t *testing.T) { + testUUID, _ := uuid.NewRandom() + superblock := &JournalSuperblock{ + header: &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeSuperblockV2, + sequence: 0, + }, + blockSize: 4096, + maxLen: 1000, + first: 1, + sequence: 1, + incompatFeatures: jbd2IncompatFeature64Bit, + uuid: &testUUID, + } + + rb := newJournalRevokeBlock(25) + rb.AddBlock(100) + rb.AddBlock(200) + + b, err := rb.ToBytes(superblock, 4096) + if err != nil { + t.Fatalf("ToBytes() error = %v", err) + } + + if len(b) != 4096 { + t.Errorf("ToBytes() returned %d bytes, want 4096", len(b)) + } + + // Verify header + if magic := binary.BigEndian.Uint32(b[0x0:0x4]); magic != journalMagic { + t.Errorf("magic = %x, want %x", magic, journalMagic) + } + + if blockType := binary.BigEndian.Uint32(b[0x4:0x8]); blockType != uint32(journalBlockTypeRevoke) { + t.Errorf("blockType = %d, want %d", blockType, journalBlockTypeRevoke) + } + + // Verify count field + count := binary.BigEndian.Uint32(b[0xc:0x10]) + if count < 16 { + t.Errorf("count = %d, want >= 16", count) + } +} + +// TestJournalDescriptorBlock tests descriptor block operations +func TestJournalDescriptorBlock(t *testing.T) { + db := newJournalDescriptorBlock(75) + + if db.header.blockType != journalBlockTypeDescriptor { + t.Errorf("blockType = %d, want %d", db.header.blockType, journalBlockTypeDescriptor) + } + + if db.header.sequence != 75 { + t.Errorf("sequence = %d, want 75", db.header.sequence) + } + + if len(db.tags) != 0 { + t.Errorf("initial tags length = %d, want 0", len(db.tags)) + } +} + +// TestJournalDescriptorBlockSerialization tests descriptor block serialization +func TestJournalDescriptorBlockSerialization(t *testing.T) { + testUUID, _ := uuid.NewRandom() + superblock := &JournalSuperblock{ + header: &journalHeader{ + magic: journalMagic, + blockType: journalBlockTypeSuperblockV2, + sequence: 0, + }, + blockSize: 4096, + maxLen: 1000, + first: 1, + sequence: 1, + incompatFeatures: jbd2IncompatFeature64Bit, + uuid: &testUUID, + } + + db := newJournalDescriptorBlock(100) + + // Create some block tags + tag1 := &journalBlockTag{ + blockNr: 1000, + flags: 0, + checksum: 0, + } + + tag2 := &journalBlockTag{ + blockNr: 2000, + flags: uint32(tagFlagLast), // Last tag + checksum: 0, + } + + db.tags = append(db.tags, tag1, tag2) + + b, err := db.ToBytes(superblock, 4096) + if err != nil { + t.Fatalf("ToBytes() error = %v", err) + } + + if len(b) != 4096 { + t.Errorf("ToBytes() returned %d bytes, want 4096", len(b)) + } + + // Verify magic + if magic := binary.BigEndian.Uint32(b[0x0:0x4]); magic != journalMagic { + t.Errorf("magic = %x, want %x", magic, journalMagic) + } + + // Verify block type + if blockType := binary.BigEndian.Uint32(b[0x4:0x8]); blockType != uint32(journalBlockTypeDescriptor) { + t.Errorf("blockType = %d, want %d", blockType, journalBlockTypeDescriptor) + } +} + +// TestBlockTagSerialization tests block tag serialization with 64-bit support +func TestBlockTagSerialization(t *testing.T) { + testUUID, _ := uuid.NewRandom() + superblock := &JournalSuperblock{ + incompatFeatures: jbd2IncompatFeature64Bit, + uuid: &testUUID, + } + + tag := &journalBlockTag{ + blockNr: 0x123456789ABCDEF0, + flags: uint32(tagFlagSameUUID), // Same UUID, so no UUID field + checksum: 0xDEADBEEF, + } + + b := tag.toBytes(false, superblock) + + // Should be: 4 bytes lower blockNr + 4 bytes flags + 4 bytes upper blockNr + 4 bytes checksum = 16 bytes + expectedSize := 16 + if len(b) != expectedSize { + t.Errorf("toBytes() returned %d bytes, want %d", len(b), expectedSize) + } + + // Verify lower block number + lower := binary.BigEndian.Uint32(b[0x0:0x4]) + if uint64(lower) != tag.blockNr&0xffffffff { + t.Errorf("lower blockNr = %x, want %x", lower, uint32(tag.blockNr&0xffffffff)) + } + + // Verify upper block number + upper := binary.BigEndian.Uint32(b[0x8:0xc]) + if uint64(upper) != tag.blockNr>>32 { + t.Errorf("upper blockNr = %x, want %x", upper, uint32(tag.blockNr>>32)) + } + + // Verify checksum + checksum := binary.BigEndian.Uint32(b[0xc:0x10]) + if checksum != 0xDEADBEEF { + t.Errorf("checksum = %x, want %x", checksum, 0xDEADBEEF) + } +} + +// TestBlockTagLastFlag tests last flag handling in block tags +func TestBlockTagLastFlag(t *testing.T) { + testUUID, _ := uuid.NewRandom() + superblock := &JournalSuperblock{ + incompatFeatures: jbd2IncompatFeature64Bit, + uuid: &testUUID, + } + + tag := &journalBlockTag{ + blockNr: 1000, + flags: 0, + checksum: 0, + } + + // Serialize with isLast=true + b := tag.toBytes(true, superblock) + + // Verify flags field has last flag set + flags := binary.BigEndian.Uint32(b[0x4:0x8]) + if flags&uint32(tagFlagLast) == 0 { + t.Error("last flag not set in serialized tag") + } +} + +// TestGetBlockTagSize tests the block tag size calculation +func TestGetBlockTagSize(t *testing.T) { + testUUID, _ := uuid.NewRandom() + + tests := []struct { + name string + superblock *JournalSuperblock + tag *journalBlockTag + expectedMin int + }{ + { + name: "without 64-bit, with UUID", + superblock: &JournalSuperblock{ + incompatFeatures: 0, + uuid: &testUUID, + }, + tag: &journalBlockTag{ + flags: 0, // UUID included + uuid: make([]byte, 16), + }, + expectedMin: 24, // 4 + 4 + 4 + 16 + }, + { + name: "with 64-bit, with UUID", + superblock: &JournalSuperblock{ + incompatFeatures: jbd2IncompatFeature64Bit, + uuid: &testUUID, + }, + tag: &journalBlockTag{ + flags: 0, // UUID included + uuid: make([]byte, 16), + }, + expectedMin: 28, // 4 + 4 + 4 + 4 + 16 + }, + { + name: "with 64-bit, same UUID", + superblock: &JournalSuperblock{ + incompatFeatures: jbd2IncompatFeature64Bit, + uuid: &testUUID, + }, + tag: &journalBlockTag{ + flags: uint32(tagFlagSameUUID), // UUID not included + }, + expectedMin: 12, // 4 + 4 + 4 + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + size := getBlockTagSize(tt.superblock, tt.tag) + if size < tt.expectedMin { + t.Errorf("getBlockTagSize() = %d, want >= %d", size, tt.expectedMin) + } + }) + } +} diff --git a/filesystem/ext4/superblock.go b/filesystem/ext4/superblock.go index 0b68c86..3591f39 100644 --- a/filesystem/ext4/superblock.go +++ b/filesystem/ext4/superblock.go @@ -4,6 +4,7 @@ import ( "encoding/binary" "fmt" "math" + "math/bits" "reflect" "sort" "time" @@ -489,8 +490,33 @@ func (sb *superblock) toBytes() ([]byte, error) { binary.LittleEndian.PutUint32(b[0x10:0x14], sb.freeInodes) binary.LittleEndian.PutUint32(b[0x14:0x18], sb.firstDataBlock) - binary.LittleEndian.PutUint32(b[0x18:0x1c], uint32(math.Log2(float64(sb.blockSize))-10)) - binary.LittleEndian.PutUint32(b[0x1c:0x20], uint32(math.Log2(float64(sb.clusterSize)))) + // blockSize must be power-of-two and >= 1024 + logBlockSize := uint32(bits.TrailingZeros32(sb.blockSize)) - 10 + binary.LittleEndian.PutUint32(b[0x18:0x1c], logBlockSize) + + if sb.blockSize < 1024 || sb.blockSize&(sb.blockSize-1) != 0 { + return nil, fmt.Errorf("invalid blockSize %d", sb.blockSize) + } + if sb.clusterSize <= 0 { + return nil, fmt.Errorf("invalid clusterSize %d", sb.clusterSize) + } + + // s_log_cluster_size = log2(clusterSize / blockSize) (or 0 if !bigalloc) + var logCluster uint32 + blockSize := uint64(sb.blockSize) + if sb.features.bigalloc { + if sb.clusterSize%blockSize != 0 { + return nil, fmt.Errorf("clusterSize %d not multiple of blockSize %d", sb.clusterSize, sb.blockSize) + } + ratio := sb.clusterSize / blockSize + if ratio == 0 || ratio&(ratio-1) != 0 { + return nil, fmt.Errorf("clusterSize/blockSize ratio must be power of two, got %d", ratio) + } + logCluster = uint32(bits.TrailingZeros32(uint32(ratio))) + } else { + logCluster = 0 + } + binary.LittleEndian.PutUint32(b[0x1c:0x20], logCluster) binary.LittleEndian.PutUint32(b[0x20:0x24], sb.blocksPerGroup) if sb.features.bigalloc { @@ -578,13 +604,12 @@ func (sb *superblock) toBytes() ([]byte, error) { binary.LittleEndian.PutUint32(b[0xe8:0xec], sb.orphanedInodesStart) // to be safe - if len(sb.hashTreeSeed) < 4 { - sb.hashTreeSeed = append(sb.hashTreeSeed, 0, 0, 0, 0) - } - binary.LittleEndian.PutUint32(b[0xec:0xf0], sb.hashTreeSeed[0]) - binary.LittleEndian.PutUint32(b[0xf0:0xf4], sb.hashTreeSeed[1]) - binary.LittleEndian.PutUint32(b[0xf4:0xf8], sb.hashTreeSeed[2]) - binary.LittleEndian.PutUint32(b[0xf8:0xfc], sb.hashTreeSeed[3]) + hashTreeSeed := make([]uint32, 4) + copy(hashTreeSeed, sb.hashTreeSeed) + binary.LittleEndian.PutUint32(b[0xec:0xf0], hashTreeSeed[0]) + binary.LittleEndian.PutUint32(b[0xf0:0xf4], hashTreeSeed[1]) + binary.LittleEndian.PutUint32(b[0xf4:0xf8], hashTreeSeed[2]) + binary.LittleEndian.PutUint32(b[0xf8:0xfc], hashTreeSeed[3]) b[0xfc] = byte(sb.hashVersion) @@ -617,7 +642,6 @@ func (sb *superblock) toBytes() ([]byte, error) { binary.LittleEndian.PutUint16(b[0x166:0x168], sb.multiMountPreventionInterval) binary.LittleEndian.PutUint64(b[0x168:0x170], sb.multiMountProtectionBlock) - b[0x174] = uint8(math.Log2(float64(sb.logGroupsPerFlex))) b[0x175] = sb.checksumType // only valid one is 1 diff --git a/util/bitmap/bitmap.go b/util/bitmap/bitmap.go index 84c5531..0361294 100644 --- a/util/bitmap/bitmap.go +++ b/util/bitmap/bitmap.go @@ -25,15 +25,28 @@ func FromBytes(b []byte) *Bitmap { return &bm } -// New creates a new bitmap of size bytes; it is not in bits to force the caller to have +// NewBytes creates a new bitmap of size bytes; it is not in bits to force the caller to have // a complete set -func New(bytes int) *Bitmap { +func NewBytes(nbytes int) *Bitmap { + if nbytes < 0 { + nbytes = 0 + } bm := Bitmap{ - bits: make([]byte, bytes), + bits: make([]byte, nbytes), } return &bm } +// NewBits creates a new bitmap that can address nBits entries. +// All bits are initially 0 (free). +func NewBits(nBits int) *Bitmap { + if nBits < 0 { + nBits = 0 + } + nBytes := (nBits + 7) / 8 + return NewBytes(nBytes) +} + // ToBytes returns raw bytes underlying the bitmap func (bm *Bitmap) ToBytes() []byte { b := make([]byte, len(bm.bits)) @@ -51,6 +64,9 @@ func (bm *Bitmap) FromBytes(b []byte) { // IsSet check if a specific bit location is set func (bm *Bitmap) IsSet(location int) (bool, error) { + if location < 0 { + return false, fmt.Errorf("location %d is negative", location) + } byteNumber, bitNumber := findBitForIndex(location) if byteNumber > len(bm.bits) { return false, fmt.Errorf("location %d is not in %d size bitmap", location, len(bm.bits)*8) @@ -61,8 +77,11 @@ func (bm *Bitmap) IsSet(location int) (bool, error) { // Clear a specific bit location func (bm *Bitmap) Clear(location int) error { + if location < 0 { + return fmt.Errorf("location %d is negative", location) + } byteNumber, bitNumber := findBitForIndex(location) - if byteNumber > len(bm.bits) { + if byteNumber >= len(bm.bits) { return fmt.Errorf("location %d is not in %d size bitmap", location, len(bm.bits)*8) } mask := byte(0x1) << bitNumber @@ -73,8 +92,11 @@ func (bm *Bitmap) Clear(location int) error { // Set a specific bit location func (bm *Bitmap) Set(location int) error { + if location < 0 { + return fmt.Errorf("location %d is negative", location) + } byteNumber, bitNumber := findBitForIndex(location) - if byteNumber > len(bm.bits) { + if byteNumber >= len(bm.bits) { return fmt.Errorf("location %d is not in %d size bitmap", location, len(bm.bits)*8) } mask := byte(0x1) << bitNumber @@ -86,29 +108,45 @@ func (bm *Bitmap) Set(location int) error { // Begins at start, so if you want to find the first free bit, pass start=1. // Returns -1 if none found. func (bm *Bitmap) FirstFree(start int) int { - var location = -1 - candidates := bm.bits[start/8:] - for i, b := range candidates { - // if all used, continue to next byte - if b&0xff == 0xff { + if start < 0 { + start = 0 + } + totalBits := len(bm.bits) * 8 + if start >= totalBits { + return -1 + } + // Start scanning at the relevant byte, but ensure we don't return a bit < start. + byteIdx := start / 8 + bitStart := uint8(start % 8) + + // First partial byte + b := bm.bits[byteIdx] + if b != 0xff { + for j := bitStart; j < 8; j++ { + if (b & (byte(1) << j)) == 0 { + return byteIdx*8 + int(j) + } + } + } + + // Remaining full bytes + for i := byteIdx + 1; i < len(bm.bits); i++ { + b = bm.bits[i] + if b == 0xff { continue } - // not all used, so find first bit set to 0 for j := uint8(0); j < 8; j++ { - mask := byte(0x1) << j - if b&mask != mask { - location = 8*i + int(j) - break + if (b & (byte(1) << j)) == 0 { + return i*8 + int(j) } } - break } - return location + + return -1 } // FirstSet returns location of first set bit in the bitmap func (bm *Bitmap) FirstSet() int { - var location = -1 for i, b := range bm.bits { // if all free, continue to next if b == 0x00 { @@ -116,16 +154,12 @@ func (bm *Bitmap) FirstSet() int { } // not all free, so find first bit set to 1 for j := uint8(0); j < 8; j++ { - mask := byte(0x1) << j - mask = ^mask - if b|mask != mask { - location = 8*i + (8 - int(j)) - break + if (b & (byte(1) << j)) != 0 { + return i*8 + int(j) } } - break } - return location + return -1 } // FreeList returns a slicelist of contiguous free locations by location.