diff --git a/kmod/src/Makefile.kernelcompat b/kmod/src/Makefile.kernelcompat index 403a2f668..d21b51f54 100644 --- a/kmod/src/Makefile.kernelcompat +++ b/kmod/src/Makefile.kernelcompat @@ -6,26 +6,6 @@ ccflags-y += -include $(src)/kernelcompat.h -# -# v3.10-rc6-21-gbb6f619b3a49 -# -# _readdir changes from fop->readdir() to fop->iterate() and from -# filldir(dirent) to dir_emit(ctx). -# -ifneq (,$(shell grep 'iterate.*dir_context' include/linux/fs.h)) -ccflags-y += -DKC_ITERATE_DIR_CONTEXT -endif - -# -# v3.10-rc6-23-g5f99f4e79abc -# -# Helpers including dir_emit_dots() are added in the process of -# switching dcache_readdir() from fop->readdir() to fop->iterate() -# -ifneq (,$(shell grep 'dir_emit_dots' include/linux/fs.h)) -ccflags-y += -DKC_DIR_EMIT_DOTS -endif - # # v3.18-rc2-19-gb5ae6b15bd73 # @@ -431,3 +411,26 @@ endif ifneq (,$(shell grep 'struct file.*bdev_file_open_by_path.const char.*path' include/linux/blkdev.h)) ccflags-y += -DKC_BDEV_FILE_OPEN_BY_PATH endif + +# v4.0-rc7-1796-gfe0f07d08ee3 +# +# direct-io changes modify inode_dio_done to now be called inode_dio_end +ifneq (,$(shell grep 'void inode_dio_end.struct inode' include/linux/fs.h)) +ccflags-y += -DKC_INODE_DIO_END +endif + +# +# v5.0-6476-g3d3539018d2c +# +# page fault handlers return a bitmask vm_fault_t instead +# Note: el8's header has a slightly modified prefix here +ifneq (,$(shell grep 'typedef.*__bitwise unsigned.*int vm_fault_t' include/linux/mm_types.h)) +ccflags-y += -DKC_MM_VM_FAULT_T +endif + +# v3.19-499-gd83a08db5ba6 +# +# .remap pages becomes obsolete +ifneq (,$(shell grep 'int ..remap_pages..struct vm_area_struct' include/linux/mm.h)) +ccflags-y += -DKC_MM_REMAP_PAGES +endif diff --git a/kmod/src/data.c b/kmod/src/data.c index 909167bc1..7903e8d78 100644 --- a/kmod/src/data.c +++ b/kmod/src/data.c @@ -560,7 +560,7 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock, u64 offset; int ret; - WARN_ON_ONCE(create && !inode_is_locked(inode)); + WARN_ON_ONCE(create && !rwsem_is_locked(&si->extent_sem)); /* make sure caller holds a cluster lock */ lock = scoutfs_per_task_get(&si->pt_data_lock); @@ -1551,13 +1551,17 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, struct super_block *sb = inode->i_sb; const u64 ino = scoutfs_ino(inode); struct scoutfs_lock *lock = NULL; + struct scoutfs_extent *info = NULL; + struct page *page = NULL; struct scoutfs_extent ext; struct scoutfs_extent cur; struct data_ext_args args; u32 last_flags; u64 iblock; u64 last; + int entries = 0; int ret; + int complete = 0; if (len == 0) { ret = 0; @@ -1568,16 +1572,11 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (ret) goto out; - inode_lock(inode); - down_read(&si->extent_sem); - - ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock); - if (ret) - goto unlock; - - args.ino = ino; - args.inode = inode; - args.lock = lock; + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto out; + } /* use a dummy extent to track */ memset(&cur, 0, sizeof(cur)); @@ -1586,48 +1585,93 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, iblock = start >> SCOUTFS_BLOCK_SM_SHIFT; last = (start + len - 1) >> SCOUTFS_BLOCK_SM_SHIFT; + args.ino = ino; + args.inode = inode; + + /* outer loop */ while (iblock <= last) { - ret = scoutfs_ext_next(sb, &data_ext_ops, &args, - iblock, 1, &ext); - if (ret < 0) { - if (ret == -ENOENT) - ret = 0; - last_flags = FIEMAP_EXTENT_LAST; + /* lock */ + inode_lock(inode); + down_read(&si->extent_sem); + + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock); + if (ret) { + up_read(&si->extent_sem); + inode_unlock(inode); break; } - trace_scoutfs_data_fiemap_extent(sb, ino, &ext); + args.lock = lock; - if (ext.start > last) { - /* not setting _LAST, it's for end of file */ - ret = 0; - break; + /* collect entries */ + info = page_address(page); + memset(info, 0, PAGE_SIZE); + while (entries < (PAGE_SIZE / sizeof(struct fiemap_extent)) - 1) { + ret = scoutfs_ext_next(sb, &data_ext_ops, &args, + iblock, 1, &ext); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + complete = 1; + last_flags = FIEMAP_EXTENT_LAST; + break; + } + + trace_scoutfs_data_fiemap_extent(sb, ino, &ext); + + if (ext.start > last) { + /* not setting _LAST, it's for end of file */ + ret = 0; + complete = 1; + break; + } + + if (scoutfs_ext_can_merge(&cur, &ext)) { + /* merged extents could be greater than input len */ + cur.len += ext.len; + } else { + /* fill it */ + memcpy(info, &cur, sizeof(cur)); + + entries++; + info++; + + cur = ext; + } + + iblock = ext.start + ext.len; } - if (scoutfs_ext_can_merge(&cur, &ext)) { - /* merged extents could be greater than input len */ - cur.len += ext.len; - } else { - ret = fill_extent(fieinfo, &cur, 0); + /* unlock */ + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); + up_read(&si->extent_sem); + inode_unlock(inode); + + if (ret) + break; + + /* emit entries */ + info = page_address(page); + for (; entries > 0; entries--) { + ret = fill_extent(fieinfo, info, 0); if (ret != 0) - goto unlock; - cur = ext; + goto out; + info++; } - iblock = ext.start + ext.len; + if (complete) + break; } + /* still one left, it's in cur */ if (cur.len) ret = fill_extent(fieinfo, &cur, last_flags); -unlock: - scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); - up_read(&si->extent_sem); - inode_unlock(inode); out: if (ret == 1) ret = 0; - + if (page) + __free_page(page); trace_scoutfs_data_fiemap(sb, start, len, ret); return ret; @@ -1914,6 +1958,236 @@ int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock, return ret; } +#ifdef KC_MM_VM_FAULT_T +static vm_fault_t scoutfs_data_page_mkwrite(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; +#else +static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ +#endif + struct page *page = vmf->page; + struct file *file = vma->vm_file; + struct inode *inode = file_inode(file); + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *lock = NULL; + SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent); + DECLARE_DATA_WAIT(dw); + struct write_begin_data wbd; + u64 ind_seq; + loff_t pos; + loff_t size; + unsigned int len = PAGE_SIZE; + vm_fault_t ret = VM_FAULT_SIGBUS; + int err; + + pos = vmf->pgoff << PAGE_SHIFT; + + sb_start_pagefault(sb); + + err = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE, + SCOUTFS_LKF_REFRESH_INODE, inode, &lock); + if (err) { + ret = vmf_error(err); + goto out; + } + + size = i_size_read(inode); + + if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, lock)) { + /* data_version is per inode, whole file must be online */ + err = scoutfs_data_wait_check(inode, 0, size, + SEF_OFFLINE, + SCOUTFS_IOC_DWO_WRITE, + &dw, lock); + if (err != 0) { + if (err < 0) + ret = vmf_error(err); + goto out_unlock; + } + } + + + /* scoutfs_write_begin */ + memset(&wbd, 0, sizeof(wbd)); + INIT_LIST_HEAD(&wbd.ind_locks); + wbd.lock = lock; + + /* + * Start transaction before taking page locks - we want to make sure we're + * not locking a page, then waiting for trans, because writeback might race + * against it and cause a lock inversion hang - as demonstrated by both + * holetest and fsstress tests in xfstests. + */ + do { + err = scoutfs_inode_index_start(sb, &ind_seq) ?: + scoutfs_inode_index_prepare(sb, &wbd.ind_locks, inode, + true) ?: + scoutfs_inode_index_try_lock_hold(sb, &wbd.ind_locks, + ind_seq, false); + } while (err > 0); + if (err < 0) { + ret = vmf_error(err); + goto out_trans; + } + + down_write(&si->extent_sem); + + if (!trylock_page(page)) { + ret = VM_FAULT_NOPAGE; + goto out_sem; + } + ret = VM_FAULT_LOCKED; + + if ((page->mapping != inode->i_mapping) || + (!PageUptodate(page)) || + (page_offset(page) > size)) { + unlock_page(page); + ret = VM_FAULT_NOPAGE; + goto out_sem; + } + + if (page->index == (size - 1) >> PAGE_SHIFT) + len = ((size - 1) & ~PAGE_MASK) + 1; + + err = __block_write_begin(page, pos, PAGE_SIZE, scoutfs_get_block); + if (err) { + ret = vmf_error(err); + unlock_page(page); + goto out_sem; + } + /* end scoutfs_write_begin */ + + /* + * We mark the page dirty already here so that when freeze is in + * progress, we are guaranteed that writeback during freezing will + * see the dirty page and writeprotect it again. + */ + set_page_dirty(page); + wait_for_stable_page(page); + + /* scoutfs_write_end */ + scoutfs_inode_set_data_seq(inode); + scoutfs_inode_inc_data_version(inode); + + file_update_time(vma->vm_file); + + scoutfs_update_inode_item(inode, wbd.lock, &wbd.ind_locks); + scoutfs_inode_queue_writeback(inode); + +out_sem: + up_write(&si->extent_sem); +out_trans: + scoutfs_release_trans(sb); + scoutfs_inode_index_unlock(sb, &wbd.ind_locks); + /* end scoutfs_write_end */ + +out_unlock: + scoutfs_per_task_del(&si->pt_data_lock, &pt_ent); + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE); + +out: + sb_end_pagefault(sb); + + if (scoutfs_data_wait_found(&dw)) { + /* + * It'd be really nice to not hold the mmap_sem lock here + * before waiting for data, and then return VM_FAULT_RETRY + */ + err = scoutfs_data_wait(inode, &dw); + if (err == 0) + ret = VM_FAULT_NOPAGE; + else + ret = vmf_error(err); + } + + trace_scoutfs_data_page_mkwrite(sb, scoutfs_ino(inode), pos, (__force u32)ret); + + return ret; +} + +#ifdef KC_MM_VM_FAULT_T +static vm_fault_t scoutfs_data_filemap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; +#else +static int scoutfs_data_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#endif + struct file *file = vma->vm_file; + struct inode *inode = file_inode(file); + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *inode_lock = NULL; + SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent); + DECLARE_DATA_WAIT(dw); + loff_t pos; + int err; + vm_fault_t ret = VM_FAULT_SIGBUS; + + pos = vmf->pgoff; + pos <<= PAGE_SHIFT; + +retry: + err = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, + SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock); + if (err < 0) + return vmf_error(err); + + if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) { + /* protect checked extents from stage/release */ + atomic_inc(&inode->i_dio_count); + + err = scoutfs_data_wait_check(inode, pos, PAGE_SIZE, + SEF_OFFLINE, SCOUTFS_IOC_DWO_READ, + &dw, inode_lock); + if (err != 0) { + if (err < 0) + ret = vmf_error(err); + goto out; + } + } + +#ifdef KC_MM_VM_FAULT_T + ret = filemap_fault(vmf); +#else + ret = filemap_fault(vma, vmf); +#endif + +out: + if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent)) + kc_inode_dio_end(inode); + scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ); + if (scoutfs_data_wait_found(&dw)) { + err = scoutfs_data_wait(inode, &dw); + if (err == 0) + goto retry; + + ret = VM_FAULT_RETRY; + } + + trace_scoutfs_data_filemap_fault(sb, scoutfs_ino(inode), pos, (__force u32)ret); + + return ret; +} + +static const struct vm_operations_struct scoutfs_data_file_vm_ops = { + .fault = scoutfs_data_filemap_fault, + .page_mkwrite = scoutfs_data_page_mkwrite, +#ifdef KC_MM_REMAP_PAGES + .remap_pages = generic_file_remap_pages, +#endif +}; + +static int scoutfs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + file_accessed(file); + vma->vm_ops = &scoutfs_data_file_vm_ops; + return 0; +} + const struct address_space_operations scoutfs_file_aops = { #ifdef KC_MPAGE_READ_FOLIO .dirty_folio = block_dirty_folio, @@ -1945,6 +2219,7 @@ const struct file_operations scoutfs_file_fops = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, #endif + .mmap = scoutfs_file_mmap, .unlocked_ioctl = scoutfs_ioctl, .fsync = scoutfs_file_fsync, .llseek = scoutfs_file_llseek, diff --git a/kmod/src/dir.c b/kmod/src/dir.c index 21c202748..95ba9db0c 100644 --- a/kmod/src/dir.c +++ b/kmod/src/dir.c @@ -11,11 +11,13 @@ * General Public License for more details. */ #include +#include #include #include #include #include #include +#include #include "format.h" #include "file.h" @@ -434,6 +436,15 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } +/* + * Helper to make iterating through dirent ptrs aligned + */ +static inline struct scoutfs_dirent *next_aligned_dirent(struct scoutfs_dirent *dent, u8 len) +{ + return (void *)dent + + ALIGN(offsetof(struct scoutfs_dirent, name[len]), __alignof__(struct scoutfs_dirent)); +} + /* * readdir simply iterates over the dirent items for the dir inode and * uses their offset as the readdir position. @@ -441,76 +452,112 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry, * It will need to be careful not to read past the region of the dirent * hash offset keys that it has access to. */ -static int KC_DECLARE_READDIR(scoutfs_readdir, struct file *file, - void *dirent, kc_readdir_ctx_t ctx) +static int scoutfs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct scoutfs_lock *dir_lock = NULL; struct scoutfs_dirent *dent = NULL; +/* we'll store name_len in dent->__pad[0] */ +#define hacky_name_len __pad[0] struct scoutfs_key last_key; struct scoutfs_key key; + struct page *page = NULL; int name_len; u64 pos; + int entries = 0; int ret; + int complete = 0; + struct scoutfs_dirent *end; - if (!kc_dir_emit_dots(file, dirent, ctx)) + if (!dir_emit_dots(file, ctx)) return 0; - dent = alloc_dirent(SCOUTFS_NAME_LEN); - if (!dent) { + page = alloc_page(GFP_KERNEL); + if (!page) return -ENOMEM; - } + + end = page_address(page) + PAGE_SIZE; init_dirent_key(&last_key, SCOUTFS_READDIR_TYPE, scoutfs_ino(inode), SCOUTFS_DIRENT_LAST_POS, 0); - ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &dir_lock); - if (ret) - goto out; - + /* + * lock and fetch dirent items, until the page no longer fits + * a max size dirent (288b). Then unlock and dir_emit the ones + * we stored in the page. + */ for (;;) { - init_dirent_key(&key, SCOUTFS_READDIR_TYPE, scoutfs_ino(inode), - kc_readdir_pos(file, ctx), 0); - - ret = scoutfs_item_next(sb, &key, &last_key, dent, - dirent_bytes(SCOUTFS_NAME_LEN), - dir_lock); - if (ret < 0) { - if (ret == -ENOENT) - ret = 0; + /* lock */ + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &dir_lock); + if (ret) break; - } - name_len = ret - sizeof(struct scoutfs_dirent); - if (name_len < 1 || name_len > SCOUTFS_NAME_LEN) { - scoutfs_corruption(sb, SC_DIRENT_READDIR_NAME_LEN, - corrupt_dirent_readdir_name_len, - "dir_ino %llu pos %llu key "SK_FMT" len %d", - scoutfs_ino(inode), - kc_readdir_pos(file, ctx), - SK_ARG(&key), name_len); - ret = -EIO; - goto out; + dent = page_address(page); + pos = ctx->pos; + while (next_aligned_dirent(dent, SCOUTFS_NAME_LEN) < end) { + init_dirent_key(&key, SCOUTFS_READDIR_TYPE, scoutfs_ino(inode), + pos, 0); + + ret = scoutfs_item_next(sb, &key, &last_key, dent, + dirent_bytes(SCOUTFS_NAME_LEN), + dir_lock); + if (ret < 0) { + if (ret == -ENOENT) { + ret = 0; + complete = 1; + } + break; + } + + name_len = ret - sizeof(struct scoutfs_dirent); + dent->hacky_name_len = name_len; + if (name_len < 1 || name_len > SCOUTFS_NAME_LEN) { + scoutfs_corruption(sb, SC_DIRENT_READDIR_NAME_LEN, + corrupt_dirent_readdir_name_len, + "dir_ino %llu pos %llu key "SK_FMT" len %d", + scoutfs_ino(inode), + pos, + SK_ARG(&key), name_len); + ret = -EIO; + break; + } + + pos = le64_to_cpu(dent->pos) + 1; + + dent = next_aligned_dirent(dent, name_len); + entries++; } - pos = le64_to_cpu(key.skd_major); - kc_readdir_pos(file, ctx) = pos; + /* unlock */ + scoutfs_unlock(sb, dir_lock, SCOUTFS_LOCK_READ); - if (!kc_dir_emit(ctx, dirent, dent->name, name_len, pos, - le64_to_cpu(dent->ino), - dentry_type(dent->type))) { - ret = 0; + if (ret < 0) break; + + dent = page_address(page); + for (; entries > 0; entries--) { + ctx->pos = le64_to_cpu(dent->pos); + if (!dir_emit(ctx, dent->name, dent->hacky_name_len, + le64_to_cpu(dent->ino), + dentry_type(dent->type))) { + ret = 0; + goto out; + } + + dent = next_aligned_dirent(dent, dent->hacky_name_len); + + /* always advance ctx->pos past */ + ctx->pos++; } - kc_readdir_pos(file, ctx) = pos + 1; + if (complete) + break; } out: - scoutfs_unlock(sb, dir_lock, SCOUTFS_LOCK_READ); - - kfree(dent); + if (page) + __free_page(page); return ret; } @@ -1973,7 +2020,7 @@ const struct inode_operations scoutfs_symlink_iops = { }; const struct file_operations scoutfs_dir_fops = { - .KC_FOP_READDIR = scoutfs_readdir, + .iterate = scoutfs_readdir, #ifdef KC_FMODE_KABI_ITERATE .open = scoutfs_dir_open, #endif diff --git a/kmod/src/ioctl.c b/kmod/src/ioctl.c index ebde0a055..fea7aae33 100644 --- a/kmod/src/ioctl.c +++ b/kmod/src/ioctl.c @@ -58,25 +58,23 @@ * key space after we find no items in a given lock region. This is * relatively cheap because reading is going to check the segments * anyway. - * - * This is copying to userspace while holding a read lock. This is safe - * because faulting can send a request for a write lock while the read - * lock is being used. The cluster locks don't block tasks in a node, - * they match and the tasks fall back to local locking. In this case - * the spin locks around the item cache. */ static long scoutfs_ioc_walk_inodes(struct file *file, unsigned long arg) { struct super_block *sb = file_inode(file)->i_sb; struct scoutfs_ioctl_walk_inodes __user *uwalk = (void __user *)arg; struct scoutfs_ioctl_walk_inodes walk; - struct scoutfs_ioctl_walk_inodes_entry ent; + struct scoutfs_ioctl_walk_inodes_entry *ent = NULL; + struct scoutfs_ioctl_walk_inodes_entry *end; struct scoutfs_key next_key; struct scoutfs_key last_key; struct scoutfs_key key; struct scoutfs_lock *lock; + struct page *page = NULL; u64 last_seq; + u64 entries = 0; int ret = 0; + int complete = 0; u32 nr = 0; u8 type; @@ -107,6 +105,10 @@ static long scoutfs_ioc_walk_inodes(struct file *file, unsigned long arg) } } + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + scoutfs_inode_init_index_key(&key, type, walk.first.major, walk.first.minor, walk.first.ino); scoutfs_inode_init_index_key(&last_key, type, walk.last.major, @@ -115,77 +117,107 @@ static long scoutfs_ioc_walk_inodes(struct file *file, unsigned long arg) /* cap nr to the max the ioctl can return to a compat task */ walk.nr_entries = min_t(u64, walk.nr_entries, INT_MAX); - ret = scoutfs_lock_inode_index(sb, SCOUTFS_LOCK_READ, type, - walk.first.major, walk.first.ino, - &lock); - if (ret < 0) - goto out; + end = page_address(page) + PAGE_SIZE; - for (nr = 0; nr < walk.nr_entries; ) { + /* outer loop */ + for (nr = 0;;) { + ent = page_address(page); + /* make sure _pad and minor are zeroed */ + memset(ent, 0, PAGE_SIZE); - ret = scoutfs_item_next(sb, &key, &last_key, NULL, 0, lock); - if (ret < 0 && ret != -ENOENT) + ret = scoutfs_lock_inode_index(sb, SCOUTFS_LOCK_READ, type, + le64_to_cpu(key.skii_major), + le64_to_cpu(key.skii_ino), + &lock); + if (ret) break; - if (ret == -ENOENT) { - - /* done if lock covers last iteration key */ - if (scoutfs_key_compare(&last_key, &lock->end) <= 0) { - ret = 0; + /* inner loop 1 */ + while (ent + 1 < end) { + ret = scoutfs_item_next(sb, &key, &last_key, NULL, 0, lock); + if (ret < 0 && ret != -ENOENT) break; + + if (ret == -ENOENT) { + /* done if lock covers last iteration key */ + if (scoutfs_key_compare(&last_key, &lock->end) <= 0) { + ret = 0; + complete = 1; + break; + } + + /* continue iterating after locked empty region */ + key = lock->end; + scoutfs_key_inc(&key); + + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); + /* avoid double-unlocking here after break */ + lock = NULL; + + ret = scoutfs_forest_next_hint(sb, &key, &next_key); + if (ret < 0 && ret != -ENOENT) + break; + + if (ret == -ENOENT || + scoutfs_key_compare(&next_key, &last_key) > 0) { + ret = 0; + complete = 1; + break; + } + + key = next_key; + + ret = scoutfs_lock_inode_index(sb, SCOUTFS_LOCK_READ, + type, + le64_to_cpu(key.skii_major), + le64_to_cpu(key.skii_ino), + &lock); + if (ret) + break; + + continue; } - /* continue iterating after locked empty region */ - key = lock->end; - scoutfs_key_inc(&key); + ent->major = le64_to_cpu(key.skii_major); + ent->ino = le64_to_cpu(key.skii_ino); - scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); + scoutfs_key_inc(&key); - ret = scoutfs_forest_next_hint(sb, &key, &next_key); - if (ret < 0 && ret != -ENOENT) - goto out; + ent++; + entries++; - if (ret == -ENOENT || - scoutfs_key_compare(&next_key, &last_key) > 0) { - ret = 0; - goto out; + if (nr + entries >= walk.nr_entries) { + complete = 1; + break; } + } - key = next_key; + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); + if (ret < 0) + break; - ret = scoutfs_lock_inode_index(sb, SCOUTFS_LOCK_READ, - key.sk_type, - le64_to_cpu(key.skii_major), - le64_to_cpu(key.skii_ino), - &lock); - if (ret < 0) + /* inner loop 2 */ + ent = page_address(page); + for (; entries > 0; entries--) { + if (copy_to_user((void __user *)walk.entries_ptr, ent, + sizeof(struct scoutfs_ioctl_walk_inodes_entry))) { + ret = -EFAULT; goto out; - - continue; + } + walk.entries_ptr += sizeof(struct scoutfs_ioctl_walk_inodes_entry); + ent++; + nr++; } - ent.major = le64_to_cpu(key.skii_major); - ent.minor = 0; - ent.ino = le64_to_cpu(key.skii_ino); - - if (copy_to_user((void __user *)walk.entries_ptr, &ent, - sizeof(ent))) { - ret = -EFAULT; + if (complete) break; - } - - nr++; - walk.entries_ptr += sizeof(ent); - - scoutfs_key_inc(&key); } - scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); - out: + if (page) + __free_page(page); if (nr > 0) ret = nr; - return ret; } @@ -1163,11 +1195,15 @@ static long scoutfs_ioc_get_allocated_inos(struct file *file, unsigned long arg) struct scoutfs_lock *lock = NULL; struct scoutfs_key key; struct scoutfs_key end; + struct page *page = NULL; u64 __user *uinos; u64 bytes; - u64 ino; + u64 *ino; + u64 *ino_end; + int entries = 0; int nr; int ret; + int complete = 0; if (!(file->f_mode & FMODE_READ)) { ret = -EBADF; @@ -1189,47 +1225,83 @@ static long scoutfs_ioc_get_allocated_inos(struct file *file, unsigned long arg) goto out; } + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto out; + } + ino_end = page_address(page) + PAGE_SIZE; + scoutfs_inode_init_key(&key, gai.start_ino); scoutfs_inode_init_key(&end, gai.start_ino | SCOUTFS_LOCK_INODE_GROUP_MASK); uinos = (void __user *)gai.inos_ptr; bytes = gai.inos_bytes; nr = 0; - ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_READ, 0, gai.start_ino, &lock); - if (ret < 0) - goto out; + for (;;) { + + ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_READ, 0, gai.start_ino, &lock); + if (ret < 0) + goto out; - while (bytes >= sizeof(*uinos)) { + ino = page_address(page); + while (ino < ino_end) { - ret = scoutfs_item_next(sb, &key, &end, NULL, 0, lock); - if (ret < 0) { - if (ret == -ENOENT) + ret = scoutfs_item_next(sb, &key, &end, NULL, 0, lock); + if (ret < 0) { + if (ret == -ENOENT) { + ret = 0; + complete = 1; + } + break; + } + + if (key.sk_zone != SCOUTFS_FS_ZONE) { ret = 0; - break; + complete = 1; + break; + } + + /* all fs items are owned by allocated inodes, and _first is always ino */ + *ino = le64_to_cpu(key._sk_first); + scoutfs_inode_init_key(&key, *ino + 1); + + ino++; + entries++; + nr++; + + bytes -= sizeof(*uinos); + if (bytes < sizeof(*uinos)) { + complete = 1; + break; + } + + if (nr == INT_MAX) { + complete = 1; + break; + } } - if (key.sk_zone != SCOUTFS_FS_ZONE) { - ret = 0; + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); + + if (ret < 0) break; - } - /* all fs items are owned by allocated inodes, and _first is always ino */ - ino = le64_to_cpu(key._sk_first); - if (put_user(ino, uinos)) { + ino = page_address(page); + if (copy_to_user(uinos, ino, entries * sizeof(*uinos))) { ret = -EFAULT; - break; + goto out; } - uinos++; - bytes -= sizeof(*uinos); - if (++nr == INT_MAX) - break; + uinos += entries; + entries = 0; - scoutfs_inode_init_key(&key, ino + 1); + if (complete) + break; } - - scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); out: + if (page) + __free_page(page); return ret ?: nr; } diff --git a/kmod/src/kernelcompat.h b/kmod/src/kernelcompat.h index 8c17d31ec..691db6b4b 100644 --- a/kmod/src/kernelcompat.h +++ b/kmod/src/kernelcompat.h @@ -29,50 +29,6 @@ do { \ }) #endif -#ifndef KC_ITERATE_DIR_CONTEXT -typedef filldir_t kc_readdir_ctx_t; -#define KC_DECLARE_READDIR(name, file, dirent, ctx) name(file, dirent, ctx) -#define KC_FOP_READDIR readdir -#define kc_readdir_pos(filp, ctx) (filp)->f_pos -#define kc_dir_emit_dots(file, dirent, ctx) dir_emit_dots(file, dirent, ctx) -#define kc_dir_emit(ctx, dirent, name, name_len, pos, ino, dt) \ - (ctx(dirent, name, name_len, pos, ino, dt) == 0) -#else -typedef struct dir_context * kc_readdir_ctx_t; -#define KC_DECLARE_READDIR(name, file, dirent, ctx) name(file, ctx) -#define KC_FOP_READDIR iterate -#define kc_readdir_pos(filp, ctx) (ctx)->pos -#define kc_dir_emit_dots(file, dirent, ctx) dir_emit_dots(file, ctx) -#define kc_dir_emit(ctx, dirent, name, name_len, pos, ino, dt) \ - dir_emit(ctx, name, name_len, ino, dt) -#endif - -#ifndef KC_DIR_EMIT_DOTS -/* - * Kernels before ->iterate and don't have dir_emit_dots so we give them - * one that works with the ->readdir() filldir() method. - */ -static inline int dir_emit_dots(struct file *file, void *dirent, - filldir_t filldir) -{ - if (file->f_pos == 0) { - if (filldir(dirent, ".", 1, 1, - file->f_path.dentry->d_inode->i_ino, DT_DIR)) - return 0; - file->f_pos = 1; - } - - if (file->f_pos == 1) { - if (filldir(dirent, "..", 2, 1, - parent_ino(file->f_path.dentry), DT_DIR)) - return 0; - file->f_pos = 2; - } - - return 1; -} -#endif - #ifdef KC_POSIX_ACL_VALID_USER_NS #define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(user_ns, acl) #else @@ -438,4 +394,20 @@ static inline int kc_tcp_sock_set_nodelay(struct socket *sock) } #endif +#ifdef KC_INODE_DIO_END +#define kc_inode_dio_end inode_dio_end +#else +#define kc_inode_dio_end inode_dio_done +#endif + +#ifndef KC_MM_VM_FAULT_T +typedef unsigned int vm_fault_t; +static inline vm_fault_t vmf_error(int err) +{ + if (err == -ENOMEM) + return VM_FAULT_OOM; + return VM_FAULT_SIGBUS; +} +#endif + #endif diff --git a/kmod/src/lock.c b/kmod/src/lock.c index 5f2800548..ce77bb516 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -302,6 +302,7 @@ static void lock_inc_count(unsigned int *counts, enum scoutfs_lock_mode mode) static void lock_dec_count(unsigned int *counts, enum scoutfs_lock_mode mode) { BUG_ON(mode < 0 || mode >= SCOUTFS_LOCK_NR_MODES); + BUG_ON(counts[mode] == 0); counts[mode]--; } diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h index e9c09750f..930a275a3 100644 --- a/kmod/src/scoutfs_trace.h +++ b/kmod/src/scoutfs_trace.h @@ -286,6 +286,52 @@ TRACE_EVENT(scoutfs_data_alloc_block_enter, STE_ENTRY_ARGS(ext)) ); +TRACE_EVENT(scoutfs_data_page_mkwrite, + TP_PROTO(struct super_block *sb, __u64 ino, __u64 pos, __u32 ret), + + TP_ARGS(sb, ino, pos, ret), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + __field(__u64, pos) + __field(__u32, ret) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + __entry->pos = pos; + __entry->ret = ret; + ), + + TP_printk(SCSBF" ino %llu pos %llu ret %u ", + SCSB_TRACE_ARGS, __entry->ino, __entry->pos, __entry->ret) +); + +TRACE_EVENT(scoutfs_data_filemap_fault, + TP_PROTO(struct super_block *sb, __u64 ino, __u64 pos, __u32 ret), + + TP_ARGS(sb, ino, pos, ret), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + __field(__u64, pos) + __field(__u32, ret) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + __entry->pos = pos; + __entry->ret = ret; + ), + + TP_printk(SCSBF" ino %llu pos %llu ret %u ", + SCSB_TRACE_ARGS, __entry->ino, __entry->pos, __entry->ret) +); + DECLARE_EVENT_CLASS(scoutfs_data_file_extent_class, TP_PROTO(struct super_block *sb, __u64 ino, struct scoutfs_extent *ext), diff --git a/tests/.gitignore b/tests/.gitignore index b19b962a1..32ad161c6 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -10,3 +10,5 @@ src/stage_tmpfile src/create_xattr_loop src/o_tmpfile_umask src/o_tmpfile_linkat +src/mmap_stress +src/mmap_validate diff --git a/tests/Makefile b/tests/Makefile index 4c61a0b37..3a2380dc2 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,7 +13,9 @@ BIN := src/createmany \ src/create_xattr_loop \ src/fragmented_data_extents \ src/o_tmpfile_umask \ - src/o_tmpfile_linkat + src/o_tmpfile_linkat \ + src/mmap_stress \ + src/mmap_validate DEPS := $(wildcard src/*.d) @@ -23,8 +25,10 @@ ifneq ($(DEPS),) -include $(DEPS) endif +src/mmap_stress: LIBS+=-lpthread + $(BIN): %: %.c Makefile - gcc $(CFLAGS) -MD -MP -MF $*.d $< -o $@ + gcc $(CFLAGS) -MD -MP -MF $*.d $< -o $@ $(LIBS) .PHONY: clean clean: diff --git a/tests/golden/mmap b/tests/golden/mmap new file mode 100644 index 000000000..8d5a058e8 --- /dev/null +++ b/tests/golden/mmap @@ -0,0 +1,27 @@ +== mmap_stress +thread 0 complete +thread 1 complete +thread 2 complete +thread 3 complete +thread 4 complete +== basic mmap/read/write consistency checks +== mmap read from offline extent +0: offset: 0 length: 2 flags: O.L +extents: 1 +1 +00000200: ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea ................ +0 +0: offset: 0 length: 2 flags: ..L +extents: 1 +== mmap write to an offline extent +0: offset: 0 length: 2 flags: O.L +extents: 1 +1 +0 +0: offset: 0 length: 2 flags: ..L +extents: 1 +00000000 ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea |................| +00000010 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 |................| +00000020 ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea |................| +00000030 +== done diff --git a/tests/golden/simple-readdir b/tests/golden/simple-readdir new file mode 100644 index 000000000..c2661939b --- /dev/null +++ b/tests/golden/simple-readdir @@ -0,0 +1,97 @@ +== create content +== readdir all +00000000: d_off: 0x00000001 d_reclen: 0x18 d_type: DT_DIR d_name: . +00000001: d_off: 0x00000002 d_reclen: 0x18 d_type: DT_DIR d_name: .. +00000002: d_off: 0x00000003 d_reclen: 0x18 d_type: DT_REG d_name: a +00000003: d_off: 0x00000004 d_reclen: 0x20 d_type: DT_REG d_name: aaaaaaaa +00000004: d_off: 0x00000005 d_reclen: 0x28 d_type: DT_REG d_name: aaaaaaaaaaaaaaa +00000005: d_off: 0x00000006 d_reclen: 0x30 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaa +00000006: d_off: 0x00000007 d_reclen: 0x38 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000007: d_off: 0x00000008 d_reclen: 0x38 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000008: d_off: 0x00000009 d_reclen: 0x40 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000009: d_off: 0x0000000a d_reclen: 0x48 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000000a: d_off: 0x0000000b d_reclen: 0x50 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000000b: d_off: 0x0000000c d_reclen: 0x58 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000000c: d_off: 0x0000000d d_reclen: 0x60 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000000d: d_off: 0x0000000e d_reclen: 0x68 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000000e: d_off: 0x0000000f d_reclen: 0x70 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000000f: d_off: 0x00000010 d_reclen: 0x70 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000010: d_off: 0x00000011 d_reclen: 0x78 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000011: d_off: 0x00000012 d_reclen: 0x80 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000012: d_off: 0x00000013 d_reclen: 0x88 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000013: d_off: 0x00000014 d_reclen: 0x90 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000014: d_off: 0x00000015 d_reclen: 0x98 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000015: d_off: 0x00000016 d_reclen: 0xa0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000016: d_off: 0x00000017 d_reclen: 0xa8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000017: d_off: 0x00000018 d_reclen: 0xa8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000018: d_off: 0x00000019 d_reclen: 0xb0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000019: d_off: 0x0000001a d_reclen: 0xb8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001a: d_off: 0x0000001b d_reclen: 0xc0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001b: d_off: 0x0000001c d_reclen: 0xc8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001c: d_off: 0x0000001d d_reclen: 0xd0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001d: d_off: 0x0000001e d_reclen: 0xd8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001e: d_off: 0x0000001f d_reclen: 0xe0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001f: d_off: 0x00000020 d_reclen: 0xe0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000020: d_off: 0x00000021 d_reclen: 0xe8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000021: d_off: 0x00000022 d_reclen: 0xf0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000022: d_off: 0x00000023 d_reclen: 0xf8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000023: d_off: 0x00000024 d_reclen: 0x100 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000024: d_off: 0x00000025 d_reclen: 0x108 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000025: d_off: 0x00000026 d_reclen: 0x110 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +== readdir offset +00000014: d_off: 0x00000015 d_reclen: 0x98 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000015: d_off: 0x00000016 d_reclen: 0xa0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000016: d_off: 0x00000017 d_reclen: 0xa8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000017: d_off: 0x00000018 d_reclen: 0xa8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000018: d_off: 0x00000019 d_reclen: 0xb0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000019: d_off: 0x0000001a d_reclen: 0xb8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001a: d_off: 0x0000001b d_reclen: 0xc0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001b: d_off: 0x0000001c d_reclen: 0xc8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001c: d_off: 0x0000001d d_reclen: 0xd0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001d: d_off: 0x0000001e d_reclen: 0xd8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001e: d_off: 0x0000001f d_reclen: 0xe0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001f: d_off: 0x00000020 d_reclen: 0xe0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000020: d_off: 0x00000021 d_reclen: 0xe8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000021: d_off: 0x00000022 d_reclen: 0xf0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000022: d_off: 0x00000023 d_reclen: 0xf8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000023: d_off: 0x00000024 d_reclen: 0x100 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000024: d_off: 0x00000025 d_reclen: 0x108 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000025: d_off: 0x00000026 d_reclen: 0x110 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +== readdir len (bytes) +00000000: d_off: 0x00000001 d_reclen: 0x18 d_type: DT_DIR d_name: . +00000001: d_off: 0x00000002 d_reclen: 0x18 d_type: DT_DIR d_name: .. +00000002: d_off: 0x00000003 d_reclen: 0x18 d_type: DT_REG d_name: a +00000003: d_off: 0x00000004 d_reclen: 0x20 d_type: DT_REG d_name: aaaaaaaa +00000004: d_off: 0x00000005 d_reclen: 0x28 d_type: DT_REG d_name: aaaaaaaaaaaaaaa +00000005: d_off: 0x00000006 d_reclen: 0x30 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaa +00000006: d_off: 0x00000007 d_reclen: 0x38 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaa +== introduce gap +00000000: d_off: 0x00000001 d_reclen: 0x18 d_type: DT_DIR d_name: . +00000001: d_off: 0x00000002 d_reclen: 0x18 d_type: DT_DIR d_name: .. +00000002: d_off: 0x00000003 d_reclen: 0x18 d_type: DT_REG d_name: a +00000003: d_off: 0x00000004 d_reclen: 0x20 d_type: DT_REG d_name: aaaaaaaa +00000004: d_off: 0x00000005 d_reclen: 0x28 d_type: DT_REG d_name: aaaaaaaaaaaaaaa +00000005: d_off: 0x00000006 d_reclen: 0x30 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaa +00000006: d_off: 0x00000007 d_reclen: 0x38 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000007: d_off: 0x00000008 d_reclen: 0x38 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000008: d_off: 0x00000009 d_reclen: 0x40 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000009: d_off: 0x00000014 d_reclen: 0x48 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000014: d_off: 0x00000015 d_reclen: 0x98 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000015: d_off: 0x00000016 d_reclen: 0xa0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000016: d_off: 0x00000017 d_reclen: 0xa8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000017: d_off: 0x00000018 d_reclen: 0xa8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000018: d_off: 0x00000019 d_reclen: 0xb0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000019: d_off: 0x0000001a d_reclen: 0xb8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001a: d_off: 0x0000001b d_reclen: 0xc0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001b: d_off: 0x0000001c d_reclen: 0xc8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001c: d_off: 0x0000001d d_reclen: 0xd0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001d: d_off: 0x0000001e d_reclen: 0xd8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001e: d_off: 0x0000001f d_reclen: 0xe0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +0000001f: d_off: 0x00000020 d_reclen: 0xe0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000020: d_off: 0x00000021 d_reclen: 0xe8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000021: d_off: 0x00000022 d_reclen: 0xf0 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000022: d_off: 0x00000023 d_reclen: 0xf8 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000023: d_off: 0x00000024 d_reclen: 0x100 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000024: d_off: 0x00000025 d_reclen: 0x108 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +00000025: d_off: 0x00000026 d_reclen: 0x110 d_type: DT_REG d_name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +== cleanup diff --git a/tests/golden/xfstests b/tests/golden/xfstests index 8247aa145..c4032ca94 100644 --- a/tests/golden/xfstests +++ b/tests/golden/xfstests @@ -22,6 +22,8 @@ generic/024 generic/025 generic/026 generic/028 +generic/029 +generic/030 generic/031 generic/032 generic/033 @@ -53,6 +55,7 @@ generic/073 generic/076 generic/078 generic/079 +generic/080 generic/081 generic/082 generic/084 @@ -81,10 +84,12 @@ generic/116 generic/117 generic/118 generic/119 +generic/120 generic/121 generic/122 generic/123 generic/124 +generic/126 generic/128 generic/129 generic/130 @@ -95,6 +100,7 @@ generic/136 generic/138 generic/139 generic/140 +generic/141 generic/142 generic/143 generic/144 @@ -153,6 +159,7 @@ generic/210 generic/211 generic/212 generic/214 +generic/215 generic/216 generic/217 generic/218 @@ -173,6 +180,9 @@ generic/238 generic/240 generic/244 generic/245 +generic/246 +generic/247 +generic/248 generic/249 generic/250 generic/252 @@ -231,6 +241,7 @@ generic/317 generic/319 generic/322 generic/324 +generic/325 generic/326 generic/327 generic/328 @@ -244,6 +255,7 @@ generic/337 generic/341 generic/342 generic/343 +generic/346 generic/348 generic/353 generic/355 @@ -305,7 +317,9 @@ generic/424 generic/425 generic/426 generic/427 +generic/428 generic/436 +generic/437 generic/439 generic/440 generic/443 @@ -315,6 +329,7 @@ generic/448 generic/449 generic/450 generic/451 +generic/452 generic/453 generic/454 generic/456 @@ -438,6 +453,7 @@ generic/610 generic/611 generic/612 generic/613 +generic/614 generic/618 generic/621 generic/623 @@ -451,6 +467,7 @@ generic/632 generic/634 generic/635 generic/637 +generic/638 generic/639 generic/640 generic/644 @@ -862,4 +879,4 @@ generic/688 generic/689 shared/002 shared/032 -Passed all 495 tests +Passed all 512 tests diff --git a/tests/sequence b/tests/sequence index 78001e59d..18eff7cff 100644 --- a/tests/sequence +++ b/tests/sequence @@ -6,6 +6,7 @@ inode-items-updated.sh simple-inode-index.sh simple-staging.sh simple-release-extents.sh +simple-readdir.sh get-referring-entries.sh fallocate.sh basic-truncate.sh @@ -17,6 +18,7 @@ projects.sh large-fragmented-free.sh format-version-forward-back.sh enospc.sh +mmap.sh srch-safe-merge-pos.sh srch-basic-functionality.sh simple-xattr-unit.sh diff --git a/tests/src/mmap_stress.c b/tests/src/mmap_stress.c new file mode 100644 index 000000000..94a414844 --- /dev/null +++ b/tests/src/mmap_stress.c @@ -0,0 +1,181 @@ +#define _GNU_SOURCE +/* + * mmap() stress test for scoutfs + * + * This test exercises the scoutfs kernel module's locking by + * repeatedly reading/writing using mmap and pread/write calls + * across 5 clients (mounts). + * + * Each thread operates on a single thread/client, and performs + * operations in a random order on the file. + * + * The goal is to assure that locking between _page_mkwrite vfs + * calls and the normal read/write paths do not cause deadlocks. + * + * There is no content validation performed. All that is done is + * assure that the programs continues without errors. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int size = 0; +static int count = 0; /* XXX make this duration instead */ + +struct thread_info { + int nr; + int fd; +}; + +static void *run_test_func(void *ptr) +{ + void *buf = NULL; + char *addr = NULL; + struct thread_info *tinfo = ptr; + int c = 0; + int fd; + ssize_t read, written, ret; + int preads = 0, pwrites = 0, mreads = 0, mwrites = 0; + + fd = tinfo->fd; + + if (posix_memalign(&buf, 4096, size) != 0) { + perror("calloc"); + exit(-1); + } + + addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + exit(-1); + } + + usleep(100000); /* 0.1sec to allow all threads to start roughly at the same time */ + + for (;;) { + if (++c > count) + break; + + switch (rand() % 4) { + case 0: /* pread */ + preads++; + for (read = 0; read < size;) { + ret = pread(fd, buf, size - read, read); + if (ret < 0) { + perror("pwrite"); + exit(-1); + } + read += ret; + } + break; + case 1: /* pwrite */ + pwrites++; + memset(buf, (char)(c & 0xff), size); + for (written = 0; written < size;) { + ret = pwrite(fd, buf, size - written, written); + if (ret < 0) { + perror("pwrite"); + exit(-1); + } + written += ret; + } + break; + case 2: /* mmap read */ + mreads++; + memcpy(buf, addr, size); /* noerr */ + break; + case 3: /* mmap write */ + mwrites++; + memset(buf, (char)(c & 0xff), size); + memcpy(addr, buf, size); /* noerr */ + break; + } + } + + munmap(addr, size); + + free(buf); + + printf("thread %u complete: preads %u pwrites %u mreads %u mwrites %u\n", tinfo->nr, + mreads, mwrites, preads, pwrites); + + return NULL; +} + +int main(int argc, char **argv) +{ + pthread_t thread[5]; + struct thread_info tinfo[5]; + int fd[5]; + int ret; + int i; + + if (argc != 8) { + fprintf(stderr, "%s requires 7 arguments - size count file1 file2 file3 file4 file5\n", argv[0]); + exit(-1); + } + + size = atoi(argv[1]); + if (size <= 0) { + fprintf(stderr, "invalid size, must be greater than 0\n"); + exit(-1); + } + + count = atoi(argv[2]); + if (count < 0) { + fprintf(stderr, "invalid count, must be greater than 0\n"); + exit(-1); + } + + /* create and truncate one fd */ + fd[0] = open(argv[3], O_RDWR | O_CREAT | O_TRUNC, 00644); + if (fd[0] < 0) { + perror("open"); + exit(-1); + } + + /* make it the test size */ + if (posix_fallocate(fd[0], 0, size) != 0) { + perror("fallocate"); + exit(-1); + } + + /* now open the rest of the fds */ + for (i = 1; i < 5; i++) { + fd[i] = open(argv[3+i], O_RDWR); + if (fd[i] < 0) { + perror("open"); + exit(-1); + } + } + + /* start threads */ + for (i = 0; i < 5; i++) { + tinfo[i].fd = fd[i]; + tinfo[i].nr = i; + ret = pthread_create(&thread[i], NULL, run_test_func, (void*)&tinfo[i]); + + if (ret) { + perror("pthread_create"); + exit(-1); + } + } + + /* wait for complete */ + for (i = 0; i < 5; i++) + pthread_join(thread[i], NULL); + + for (i = 0; i < 5; i++) + close(fd[i]); + + exit(0); +} diff --git a/tests/src/mmap_validate.c b/tests/src/mmap_validate.c new file mode 100644 index 000000000..40f7435de --- /dev/null +++ b/tests/src/mmap_validate.c @@ -0,0 +1,159 @@ +#define _GNU_SOURCE +/* + * mmap() content consistency checking for scoutfs + * + * This test program validates that content from memory mappings + * are consistent across clients, whether written/read with mmap or + * normal writes/reads. + * + * One side of (read/write) will always be memory mapped. It may + * be that both sides do memory mapped (33% of the time). + */ + +#include +#include +#include +#include +#include +#include +#include + +static int count = 0; +static int size = 0; + +static void run_test_func(int fd1, int fd2) +{ + void *buf1 = NULL; + void *buf2 = NULL; + char *addr1 = NULL; + char *addr2 = NULL; + int c = 0; + ssize_t read, written, ret; + + /* buffers for both sides to compare */ + if (posix_memalign(&buf1, 4096, size) != 0) { + perror("calloc1"); + exit(-1); + } + + if (posix_memalign(&buf2, 4096, size) != 0) { + perror("calloc1"); + exit(-1); + } + + /* memory maps for both sides */ + addr1 = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, fd1, 0); + if (addr1 == MAP_FAILED) { + perror("mmap1"); + exit(-1); + } + + addr2 = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, fd2, 0); + if (addr2 == MAP_FAILED) { + perror("mmap2"); + exit(-1); + } + + for (;;) { + if (++c > count) /* 10k iterations */ + break; + + /* put a pattern in buf1 */ + memset(buf1, c & 0xff, size); + + /* pwrite or mmap write from buf1 */ + switch (c % 3) { + case 0: /* pwrite */ + for (written = 0; written < size;) { + ret = pwrite(fd1, buf1, size - written, written); + if (ret < 0) { + perror("pwrite"); + exit(-1); + } + written += ret; + } + break; + default: /* mmap write */ + memcpy(addr1, buf1, size); + break; + } + + /* pread or mmap read to buf2 */ + switch (c % 3) { + case 2: /* pread */ + for (read = 0; read < size;) { + ret = pread(fd2, buf2, size - read, read); + if (ret < 0) { + perror("pwrite"); + exit(-1); + } + read += ret; + } + break; + default: /* mmap read */ + memcpy(buf2, addr2, size); + break; + } + + /* compare bufs */ + if (memcmp(buf1, buf2, size) != 0) { + fprintf(stderr, "memcmp() failed\n"); + exit(-1); + } + } + + munmap(addr1, size); + munmap(addr2, size); + + free(buf1); + free(buf2); +} + +int main(int argc, char **argv) +{ + int fd[1]; + + if (argc != 5) { + fprintf(stderr, "%s requires 4 arguments - size count file1 file2\n", argv[0]); + exit(-1); + } + + size = atoi(argv[1]); + if (size <= 0) { + fprintf(stderr, "invalid size, must be greater than 0\n"); + exit(-1); + } + + count = atoi(argv[2]); + if (count < 3) { + fprintf(stderr, "invalid count, must be greater than 3\n"); + exit(-1); + } + + /* create and truncate one fd */ + fd[0] = open(argv[3], O_RDWR | O_CREAT | O_TRUNC, 00644); + if (fd[0] < 0) { + perror("open"); + exit(-1); + } + + fd[1] = open(argv[4], O_RDWR , 00644); + if (fd[1] < 0) { + perror("open"); + exit(-1); + } + + /* make it the test size */ + if (posix_fallocate(fd[0], 0, size) != 0) { + perror("fallocate"); + exit(-1); + } + + /* run the test function */ + run_test_func(fd[0], fd[1]); + + close(fd[0]); + close(fd[1]); + + exit(0); +} diff --git a/tests/tests/mmap.sh b/tests/tests/mmap.sh new file mode 100644 index 000000000..bf465ce93 --- /dev/null +++ b/tests/tests/mmap.sh @@ -0,0 +1,54 @@ +# +# test mmap() and normal read/write consistency between different nodes +# + +t_require_commands mmap_stress mmap_validate scoutfs xfs_io + +echo "== mmap_stress" +mmap_stress 8192 2000 "$T_D0/mmap_stress" "$T_D1/mmap_stress" "$T_D2/mmap_stress" "$T_D3/mmap_stress" "$T_D4/mmap_stress" | sed 's/:.*//g' | sort + +echo "== basic mmap/read/write consistency checks" +mmap_validate 256 1000 "$T_D0/mmap_val1" "$T_D1/mmap_val1" +mmap_validate 8192 1000 "$T_D0/mmap_val2" "$T_D1/mmap_val2" +mmap_validate 88400 1000 "$T_D0/mmap_val3" "$T_D1/mmap_val3" + +echo "== mmap read from offline extent" +F="$T_D0/mmap-offline" +touch "$F" +xfs_io -c "pwrite -S 0xEA 0 8192" "$F" > /dev/null +cp "$F" "${F}-stage" +vers=$(scoutfs stat -s data_version "$F") +scoutfs release "$F" -V "$vers" -o 0 -l 8192 +scoutfs get-fiemap -L "$F" +xfs_io -c "mmap -rwx 0 8192" \ + -c "mread -v 512 16" "$F" & +sleep 1 +# should be 1 - data waiting +jobs | wc -l +scoutfs stage "${F}-stage" "$F" -V "$vers" -o 0 -l 8192 +# xfs_io thread will output 16 bytes of read data +sleep 1 +# should be 0 - no more waiting jobs, xfs_io should have exited +jobs | wc -l +scoutfs get-fiemap -L "$F" + +echo "== mmap write to an offline extent" +# reuse the same file +scoutfs release "$F" -V "$vers" -o 0 -l 8192 +scoutfs get-fiemap -L "$F" +xfs_io -c "mmap -rwx 0 8192" \ + -c "mwrite -S 0x11 528 16" "$F" & +sleep 1 +# should be 1 job waiting +jobs | wc -l +scoutfs stage "${F}-stage" "$F" -V "$vers" -o 0 -l 8192 +# no output here from write +sleep 1 +# should be 0 - no more waiting jobs, xfs_io should have exited +jobs | wc -l +scoutfs get-fiemap -L "$F" +# read back contents to assure write changed the file +dd status=none if="$F" bs=1 count=48 skip=512 | hexdump -C + +echo "== done" +t_pass diff --git a/tests/tests/simple-readdir.sh b/tests/tests/simple-readdir.sh new file mode 100644 index 000000000..03ccb3be2 --- /dev/null +++ b/tests/tests/simple-readdir.sh @@ -0,0 +1,37 @@ +# +# verify d_off output of xfs_io is consistent. +# + +t_require_commands xfs_io + +filt() +{ + grep d_off | cut -d ' ' -f 1,4- +} + +echo "== create content" +for s in $(seq 1 7 250); do + f=$(printf '%*s' $s | tr ' ' 'a') + touch ${T_D0}/$f +done + +echo "== readdir all" +xfs_io -c "readdir -v" $T_D0 | filt + +echo "== readdir offset" +xfs_io -c "readdir -v -o 20" $T_D0 | filt + +echo "== readdir len (bytes)" +xfs_io -c "readdir -v -l 193" $T_D0 | filt + +echo "== introduce gap" +for s in $(seq 57 7 120); do + f=$(printf '%*s' $s | tr ' ' 'a') + rm -f ${T_D0}/$f +done +xfs_io -c "readdir -v" $T_D0 | filt + +echo "== cleanup" +rm -rf $T_D0 + +t_pass diff --git a/tests/tests/xfstests.sh b/tests/tests/xfstests.sh index f2850a62d..b0ae44cf0 100644 --- a/tests/tests/xfstests.sh +++ b/tests/tests/xfstests.sh @@ -65,26 +65,14 @@ EOF cat << EOF > local.exclude generic/003 # missing atime update in buffered read -generic/029 # mmap missing -generic/030 # mmap missing generic/075 # file content mismatch failures (fds, etc) -generic/080 # mmap missing generic/103 # enospc causes trans commit failures generic/108 # mount fails on failing device? generic/112 # file content mismatch failures (fds, etc) -generic/120 # (can't exec 'cause no mmap) -generic/126 # (can't exec 'cause no mmap) -generic/141 # mmap missing generic/213 # enospc causes trans commit failures -generic/215 # mmap missing -generic/246 # mmap missing -generic/247 # mmap missing -generic/248 # mmap missing generic/318 # can't support user namespaces until v5.11 generic/321 # requires selinux enabled for '+' in ls? -generic/325 # mmap missing generic/338 # BUG_ON update inode error handling -generic/346 # mmap missing generic/347 # _dmthin_mount doesn't work? generic/356 # swap generic/357 # swap @@ -92,16 +80,13 @@ generic/409 # bind mounts not scripted yet generic/410 # bind mounts not scripted yet generic/411 # bind mounts not scripted yet generic/423 # symlink inode size is strlen() + 1 on scoutfs -generic/428 # mmap missing generic/430 # xfs_io copy_range missing in el7 generic/431 # xfs_io copy_range missing in el7 generic/432 # xfs_io copy_range missing in el7 generic/433 # xfs_io copy_range missing in el7 generic/434 # xfs_io copy_range missing in el7 -generic/437 # mmap missing generic/441 # dm-mapper generic/444 # el9's posix_acl_update_mode is buggy ? -generic/452 # exec test - no mmap generic/467 # open_by_handle ESTALE generic/472 # swap generic/484 # dm-mapper @@ -118,11 +103,9 @@ generic/565 # xfs_io copy_range missing in el7 generic/568 # falloc not resulting in block count increase generic/569 # swap generic/570 # swap -generic/614 # mmap missing generic/620 # dm-hugedisk -generic/633 # mmap, id-mapped mounts missing in el7 +generic/633 # id-mapped mounts missing in el7 generic/636 # swap -generic/638 # mmap missing generic/641 # swap generic/643 # swap EOF