From 16b8c1cd869ecd7f8f3aa9583191771515ca1f71 Mon Sep 17 00:00:00 2001 From: Horst Birthelmer Date: Wed, 14 Jan 2026 12:39:49 +0100 Subject: [PATCH 1/2] Revert "fuse: check attributes staleness on fuse_iget()" This reverts commit fabde099e332399c6d7d315f989dd0ecf5a2b384. --- fs/fuse/dir.c | 11 +++++----- fs/fuse/fuse_i.h | 14 ++---------- fs/fuse/inode.c | 56 ++++++++++------------------------------------- fs/fuse/readdir.c | 15 +++++-------- 4 files changed, 25 insertions(+), 71 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index db8046716077fa..47e00ecbf3ec7c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -368,7 +368,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name struct fuse_mount *fm = get_fuse_mount_super(sb); FUSE_ARGS(args); struct fuse_forget_link *forget; - u64 attr_version, evict_ctr; + u64 attr_version; int err; *inode = NULL; @@ -383,7 +383,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name goto out; attr_version = fuse_get_attr_version(fm->fc); - evict_ctr = fuse_get_evict_ctr(fm->fc); fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); err = fuse_simple_request(fm, &args); @@ -401,7 +400,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, &outarg->attr, ATTR_TIMEOUT(outarg), - attr_version, evict_ctr); + attr_version); err = -ENOMEM; if (!*inode) { fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); @@ -688,7 +687,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ff->nodeid = outentry.nodeid; ff->open_flags = outopen.open_flags; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, - &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0); + &outentry.attr, ATTR_TIMEOUT(&outentry), 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(NULL, ff, flags); @@ -818,7 +817,7 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, goto out_put_forget_req; inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, - &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0); + &outarg.attr, ATTR_TIMEOUT(&outarg), 0); if (!inode) { fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); return -ENOMEM; @@ -2007,7 +2006,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, fuse_change_attributes_common(inode, &outarg.attr, NULL, ATTR_TIMEOUT(&outarg), - fuse_get_cache_mask(inode), 0); + fuse_get_cache_mask(inode)); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ if (!is_wb || is_truncate) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4f0474e2e31def..cfd778fe18f1da 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -896,9 +896,6 @@ struct fuse_conn { /** Version counter for attribute changes */ atomic64_t attr_version; - /** Version counter for evict inode */ - atomic64_t evict_ctr; - /* maximum file name length */ u32 name_max; @@ -1013,11 +1010,6 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) return atomic64_read(&fc->attr_version); } -static inline u64 fuse_get_evict_ctr(struct fuse_conn *fc) -{ - return atomic64_read(&fc->evict_ctr); -} - static inline bool fuse_stale_inode(const struct inode *inode, int generation, struct fuse_attr *attr) { @@ -1085,8 +1077,7 @@ extern const struct dentry_operations fuse_root_dentry_operations; */ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version, - u64 evict_ctr); + u64 attr_valid, u64 attr_version); int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, struct fuse_entry_out *outarg, struct inode **inode); @@ -1184,8 +1175,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, - u64 attr_valid, u32 cache_mask, - u64 evict_ctr); + u64 attr_valid, u32 cache_mask); u32 fuse_get_cache_mask(struct inode *inode); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 798e69c40a5c3f..82e818b9a9cef8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -170,14 +170,6 @@ static void fuse_evict_inode(struct inode *inode) fuse_cleanup_submount_lookup(fc, fi->submount_lookup); fi->submount_lookup = NULL; } - /* - * Evict of non-deleted inode may race with outstanding - * LOOKUP/READDIRPLUS requests and result in inconsistency when - * the request finishes. Deal with that here by bumping a - * counter that can be compared to the starting value. - */ - if (inode->i_nlink > 0) - atomic64_inc(&fc->evict_ctr); } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); @@ -211,30 +203,17 @@ static ino_t fuse_squash_ino(u64 ino64) void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, - u64 attr_valid, u32 cache_mask, - u64 evict_ctr) + u64 attr_valid, u32 cache_mask) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); lockdep_assert_held(&fi->lock); - /* - * Clear basic stats from invalid mask. - * - * Don't do this if this is coming from a fuse_iget() call and there - * might have been a racing evict which would've invalidated the result - * if the attr_version would've been preserved. - * - * !evict_ctr -> this is create - * fi->attr_version != 0 -> this is not a new inode - * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request - */ - if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) - set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); - fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->i_time = attr_valid; + /* Clear basic stats from invalid mask */ + set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); @@ -313,9 +292,9 @@ u32 fuse_get_cache_mask(struct inode *inode) return STATX_MTIME | STATX_CTIME | STATX_SIZE; } -static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr, - struct fuse_statx *sx, u64 attr_valid, - u64 attr_version, u64 evict_ctr) +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, + u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -349,8 +328,7 @@ static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr } old_mtime = inode_get_mtime(inode); - fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask, - evict_ctr); + fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask); oldsize = inode->i_size; /* @@ -391,13 +369,6 @@ static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr fuse_dax_dontcache(inode, attr->flags); } -void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - struct fuse_statx *sx, u64 attr_valid, - u64 attr_version) -{ - fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0); -} - static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, u64 nodeid) { @@ -452,8 +423,7 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp) struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version, - u64 evict_ctr) + u64 attr_valid, u64 attr_version) { struct inode *inode; struct fuse_inode *fi; @@ -514,8 +484,8 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, fi->nlookup++; spin_unlock(&fi->lock); done: - fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version, - evict_ctr); + fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); + return inode; } @@ -1033,7 +1003,6 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, fc->compound_open_getattr = 1; atomic64_set(&fc->attr_version, 1); - atomic64_set(&fc->evict_ctr, 1); get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->user_ns = get_user_ns(user_ns); @@ -1093,7 +1062,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) attr.mode = mode; attr.ino = FUSE_ROOT_ID; attr.nlink = 1; - return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0); + return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0); } struct fuse_inode_handle { @@ -1721,8 +1690,7 @@ static int fuse_fill_super_submount(struct super_block *sb, return -ENOMEM; fuse_fill_attr_from_inode(&root_attr, parent_fi); - root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0, - fuse_get_evict_ctr(fm->fc)); + root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); /* * This inode is just a duplicate, so it is not looked up and * its nlookup should not be incremented. fuse_iget() does diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index ef231156599099..8ccdcd51606c94 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -149,7 +149,7 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, static int fuse_direntplus_link(struct file *file, struct fuse_direntplus *direntplus, - u64 attr_version, u64 evict_ctr) + u64 attr_version) { struct fuse_entry_out *o = &direntplus->entry_out; struct fuse_dirent *dirent = &direntplus->dirent; @@ -233,7 +233,7 @@ static int fuse_direntplus_link(struct file *file, } else { inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, &o->attr, ATTR_TIMEOUT(o), - attr_version, evict_ctr); + attr_version); if (!inode) inode = ERR_PTR(-ENOMEM); @@ -284,8 +284,7 @@ static void fuse_force_forget(struct file *file, u64 nodeid) } static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, - struct dir_context *ctx, u64 attr_version, - u64 evict_ctr) + struct dir_context *ctx, u64 attr_version) { struct fuse_direntplus *direntplus; struct fuse_dirent *dirent; @@ -320,7 +319,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, buf += reclen; nbytes -= reclen; - ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr); + ret = fuse_direntplus_link(file, direntplus, attr_version); if (ret) fuse_force_forget(file, direntplus->entry_out.nodeid); } @@ -339,7 +338,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) struct fuse_args *args = &ia.ap.args; void *buf; size_t bufsize = fc->max_pages << PAGE_SHIFT; - u64 attr_version = 0, evict_ctr = 0; + u64 attr_version = 0; bool locked; buf = kvmalloc(bufsize, GFP_KERNEL); @@ -351,7 +350,6 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) plus = fuse_use_readdirplus(inode, ctx); if (plus) { attr_version = fuse_get_attr_version(fm->fc); - evict_ctr = fuse_get_evict_ctr(fm->fc); fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS); } else { fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR); @@ -366,8 +364,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) if (ff->open_flags & FOPEN_CACHE_DIR) fuse_readdir_cache_end(file, ctx->pos); } else if (plus) { - res = parse_dirplusfile(buf, res, file, ctx, attr_version, - evict_ctr); + res = parse_dirplusfile(buf, res, file, ctx, attr_version); } else { res = parse_dirfile(buf, res, file, ctx); } From 55d22751e5b2cb9533aab60c4caeb3ae1346d5ca Mon Sep 17 00:00:00 2001 From: Horst Birthelmer Date: Wed, 14 Jan 2026 13:59:56 +0100 Subject: [PATCH 2/2] fuse: fix race between fuse_iget() and fuse_reverse_inval_inode() There is a race condition where fuse_reverse_inval_inode() can be called while fuse_iget() is still initializing an inode. This can lead to the inode's attributes being invalidated before they are properly set up, causing inconsistent state. The race occurs when: 1. Thread A calls fuse_iget() and starts initializing an inode 2. Thread B calls fuse_reverse_inval_inode() for the same inode 3. Thread B updates attr_version, invalidating the inode 4. Thread A completes initialization with stale attributes This patch fixes the race by introducing a delayed invalidation mechanism: - Use attr_version == 1 as a sentinel value to indicate that an invalidation occurred during fuse_iget() initialization - In fuse_change_attributes_common(), skip updating attr_version if it equals 1, allowing fuse_iget() to detect the pending invalidation - In fuse_reverse_inval_inode_common(), if attr_version <= 1, set it to 1 and return early, deferring the invalidation - In fuse_iget(), after calling fuse_change_attributes(), check if attr_version == 1 and perform the delayed invalidation The function fuse_reverse_inval_inode() is refactored to extract the common invalidation logic into fuse_reverse_inval_inode_common() Since fc->attr_version is initialized to 1, normal inode attr_version values will be 0 (for new inodes before first update) or >= 2, making the value 1 safe to use as a sentinel. Signed-off-by: Guang Yuan Wu Applied-by: Horst Birthelmer --- fs/fuse/inode.c | 52 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 82e818b9a9cef8..3fb40c9ed626a4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -210,7 +210,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, lockdep_assert_held(&fi->lock); - fi->attr_version = atomic64_inc_return(&fc->attr_version); + if (fi->attr_version != 1) /* fuse_iget() handle if attr_version == 1 */ + fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->i_time = attr_valid; /* Clear basic stats from invalid mask */ set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); @@ -421,6 +422,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp) return 0; } +int fuse_reverse_inval_inode_common(struct fuse_conn *fc, struct inode *inode, + loff_t offset, loff_t len); struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) @@ -486,6 +489,13 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, done: fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); + spin_lock(&fi->lock); + if (fi->attr_version == 1) { + spin_unlock(&fi->lock); + fuse_reverse_inval_inode_common(fc, inode, 0, 0); + } else + spin_unlock(&fi->lock); + return inode; } @@ -550,20 +560,30 @@ static void fuse_invalidate_inode_entry(struct inode *inode) } } -int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, - loff_t offset, loff_t len) +int fuse_reverse_inval_inode_common(struct fuse_conn *fc, struct inode *inode, + loff_t offset, loff_t len) { struct fuse_inode *fi; - struct inode *inode; pgoff_t pg_start; pgoff_t pg_end; - inode = fuse_ilookup(fc, nodeid, NULL); - if (!inode) - return -ENOENT; - fi = get_fuse_inode(inode); spin_lock(&fi->lock); + + if (fi->attr_version <= 1) { + /* + * attr_version <= 1 indicate fuse_iget() is not completed yet. + * Skip the inode invalidation operation here, and delay it in the + * function fuse_iget(), after call to fuse_change_attributes(). + * Initialized value of fc->attr_version is 1, so fi->attr_version + * will be 0 or >= 2, we use value 1 to indicate the "delayed" + * inode invalidation operations been recorded + */ + fi->attr_version = 1; + spin_unlock(&fi->lock); + return 0; + } + fi->attr_version = atomic64_inc_return(&fc->attr_version); spin_unlock(&fi->lock); @@ -594,10 +614,24 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, invalidate_inode_pages2_range(inode->i_mapping, pg_start, pg_end); } - iput(inode); return 0; } +int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, + loff_t offset, loff_t len) +{ + struct inode *inode; + int ret; + + inode = fuse_ilookup(fc, nodeid, NULL); + if (!inode) + return -ENOENT; + + ret = fuse_reverse_inval_inode_common(fc, inode, offset, len); + iput(inode); + return ret; +} + bool fuse_lock_inode(struct inode *inode) { bool locked = false;