diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8a6cc6ef2594e..fef0511914dcc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1727,10 +1727,13 @@ struct ext4_sb_info { const char *s_last_error_func; time64_t s_last_error_time; /* - * If we are in a context where we cannot update error information in - * the on-disk superblock, we queue this work to do it. + * If we are in a context where we cannot update the on-disk + * superblock, we queue the work here. This is used to update + * the error information in the superblock, and for periodic + * updates of the superblock called from the commit callback + * function. */ - struct work_struct s_error_work; + struct work_struct s_sb_upd_work; /* Ext4 fast commit sub transaction ID */ atomic_t s_fc_subtid; @@ -1800,7 +1803,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) enum { EXT4_MF_MNTDIR_SAMPLED, EXT4_MF_FS_ABORTED, /* Fatal error detected */ - EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ + EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ + EXT4_MF_JOURNAL_DESTROY /* Journal is in process of destroying */ }; static inline void ext4_set_mount_flag(struct super_block *sb, int bit) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index db2ae4a2b38d8..8e274680cba51 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -513,4 +513,33 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) return 1; } +/* + * Pass journal explicitly as it may not be cached in the sbi->s_journal in some + * cases + */ +static inline int ext4_journal_destroy(struct ext4_sb_info *sbi, journal_t *journal) +{ + int err = 0; + + /* + * At this point only two things can be operating on the journal. + * JBD2 thread performing transaction commit and s_sb_upd_work + * issuing sb update through the journal. Once we set + * EXT4_JOURNAL_DESTROY, new ext4_handle_error() calls will not + * queue s_sb_upd_work and ext4_force_commit() makes sure any + * ext4_handle_error() calls from the running transaction commit are + * finished. Hence no new s_sb_upd_work can be queued after we + * flush it here. + */ + ext4_set_mount_flag(sbi->s_sb, EXT4_MF_JOURNAL_DESTROY); + + ext4_force_commit(sbi->s_sb); + flush_work(&sbi->s_sb_upd_work); + + err = jbd2_journal_destroy(journal); + sbi->s_journal = NULL; + + return err; +} + #endif /* _EXT4_JBD2_H */ diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index a26d4664e4ba5..e67d3cc883cc5 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -159,7 +159,6 @@ int ext4_find_inline_data_nolock(struct inode *inode) (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); - ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } out: brelse(is.iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 825f7a015422e..a93f3765edc5c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4683,8 +4683,18 @@ static inline int ext4_iget_extra_inode(struct inode *inode, if (EXT4_INODE_HAS_XATTR_SPACE(inode) && *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { + int err; + + err = xattr_check_inode(inode, IHDR(inode, raw_inode), + ITAIL(inode, raw_inode)); + if (err) + return err; + ext4_set_inode_state(inode, EXT4_STATE_XATTR); - return ext4_find_inline_data_nolock(inode); + err = ext4_find_inline_data_nolock(inode); + if (!err && ext4_has_inline_data(inode)) + ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + return err; } else EXT4_I(inode)->i_inline_off = 0; return 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ae45d4ffcb65..fd60c3142e1d6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -668,10 +668,14 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, * In case the fs should keep running, we need to writeout * superblock through the journal. Due to lock ordering * constraints, it may not be safe to do it right here so we - * defer superblock flushing to a workqueue. + * defer superblock flushing to a workqueue. We just need to be + * careful when the journal is already shutting down. If we get + * here in that case, just update the sb directly as the last + * transaction won't commit anyway. */ - if (continue_fs && journal) - schedule_work(&EXT4_SB(sb)->s_error_work); + if (continue_fs && journal && + !ext4_test_mount_flag(sb, EXT4_MF_JOURNAL_DESTROY)) + schedule_work(&EXT4_SB(sb)->s_sb_upd_work); else ext4_commit_super(sb); } @@ -698,10 +702,10 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, sb->s_flags |= SB_RDONLY; } -static void flush_stashed_error_work(struct work_struct *work) +static void update_super_work(struct work_struct *work) { struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, - s_error_work); + s_sb_upd_work); journal_t *journal = sbi->s_journal; handle_t *handle; @@ -1011,7 +1015,7 @@ __acquires(bitlock) if (!bdev_read_only(sb->s_bdev)) { save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - schedule_work(&EXT4_SB(sb)->s_error_work); + schedule_work(&EXT4_SB(sb)->s_sb_upd_work); } return; } @@ -1189,10 +1193,10 @@ static void ext4_put_super(struct super_block *sb) * Unregister sysfs before destroying jbd2 journal. * Since we could still access attr_journal_task attribute via sysfs * path which could have sbi->s_journal->j_task as NULL - * Unregister sysfs before flush sbi->s_error_work. + * Unregister sysfs before flush sbi->s_sb_upd_work. * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If * read metadata verify failed then will queue error work. - * flush_stashed_error_work will call start_this_handle may trigger + * update_super_work will call start_this_handle may trigger * BUG_ON. */ ext4_unregister_sysfs(sb); @@ -1203,18 +1207,17 @@ static void ext4_put_super(struct super_block *sb) ext4_unregister_li_request(sb); ext4_quota_off_umount(sb); - flush_work(&sbi->s_error_work); destroy_workqueue(sbi->rsv_conversion_wq); ext4_release_orphan_info(sb); if (sbi->s_journal) { aborted = is_journal_aborted(sbi->s_journal); - err = jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; + err = ext4_journal_destroy(sbi, sbi->s_journal); if ((err < 0) && !aborted) { ext4_abort(sb, -err, "Couldn't clean up the journal"); } - } + } else + flush_work(&sbi->s_sb_upd_work); ext4_es_unregister_shrinker(sbi); del_timer_sync(&sbi->s_err_report); @@ -4892,10 +4895,7 @@ static int ext4_load_and_init_journal(struct super_block *sb, return 0; out: - /* flush s_error_work before journal destroy. */ - flush_work(&sbi->s_error_work); - jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; + ext4_journal_destroy(sbi, sbi->s_journal); return -EINVAL; } @@ -5241,7 +5241,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) timer_setup(&sbi->s_err_report, print_daily_error_info, 0); spin_lock_init(&sbi->s_error_lock); - INIT_WORK(&sbi->s_error_work, flush_stashed_error_work); + INIT_WORK(&sbi->s_sb_upd_work, update_super_work); /* Register extent status tree shrinker */ if (ext4_es_register_shrinker(sbi)) @@ -5601,16 +5601,13 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sbi->s_ea_block_cache = NULL; if (sbi->s_journal) { - /* flush s_error_work before journal destroy. */ - flush_work(&sbi->s_error_work); - jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; + ext4_journal_destroy(sbi, sbi->s_journal); } failed_mount3a: ext4_es_unregister_shrinker(sbi); failed_mount3: - /* flush s_error_work before sbi destroy */ - flush_work(&sbi->s_error_work); + /* flush s_sb_upd_work before sbi destroy */ + flush_work(&sbi->s_sb_upd_work); del_timer_sync(&sbi->s_err_report); ext4_stop_mmpd(sbi); ext4_group_desc_free(sbi); @@ -5858,7 +5855,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, return journal; out_journal: - jbd2_journal_destroy(journal); + ext4_journal_destroy(EXT4_SB(sb), journal); out_bdev: ext4_blkdev_put(bdev); return NULL; @@ -5958,8 +5955,7 @@ static int ext4_load_journal(struct super_block *sb, EXT4_SB(sb)->s_journal = journal; err = ext4_clear_journal_err(sb, es); if (err) { - EXT4_SB(sb)->s_journal = NULL; - jbd2_journal_destroy(journal); + ext4_journal_destroy(EXT4_SB(sb), journal); return err; } @@ -5974,7 +5970,7 @@ static int ext4_load_journal(struct super_block *sb, return 0; err_out: - jbd2_journal_destroy(journal); + ext4_journal_destroy(EXT4_SB(sb), journal); return err; } @@ -6426,7 +6422,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) } /* Flush outstanding errors before changing fs state */ - flush_work(&sbi->s_error_work); + flush_work(&sbi->s_sb_upd_work); if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) { if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) { diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 193d963a262b5..c4e93b040721c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -182,27 +182,73 @@ ext4_xattr_handler(int name_index) } static int -ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, - void *value_start) +check_xattrs(struct inode *inode, struct buffer_head *bh, + struct ext4_xattr_entry *entry, void *end, void *value_start, + const char *function, unsigned int line) { struct ext4_xattr_entry *e = entry; + int err = -EFSCORRUPTED; + char *err_str; + + if (bh) { + if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || + BHDR(bh)->h_blocks != cpu_to_le32(1)) { + err_str = "invalid header"; + goto errout; + } + if (buffer_verified(bh)) + return 0; + if (!ext4_xattr_block_csum_verify(inode, bh)) { + err = -EFSBADCRC; + err_str = "invalid checksum"; + goto errout; + } + } else { + struct ext4_xattr_ibody_header *header = value_start; + + header -= 1; + if (end - (void *)header < sizeof(*header) + sizeof(u32)) { + err_str = "in-inode xattr block too small"; + goto errout; + } + if (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { + err_str = "bad magic number in in-inode xattr"; + goto errout; + } + } /* Find the end of the names list */ while (!IS_LAST_ENTRY(e)) { struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); - if ((void *)next >= end) - return -EFSCORRUPTED; - if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) - return -EFSCORRUPTED; + if ((void *)next >= end) { + err_str = "e_name out of bounds"; + goto errout; + } + if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) { + err_str = "bad e_name length"; + goto errout; + } e = next; } /* Check the values */ while (!IS_LAST_ENTRY(entry)) { u32 size = le32_to_cpu(entry->e_value_size); + unsigned long ea_ino = le32_to_cpu(entry->e_value_inum); - if (size > EXT4_XATTR_SIZE_MAX) - return -EFSCORRUPTED; + if (!ext4_has_feature_ea_inode(inode->i_sb) && ea_ino) { + err_str = "ea_inode specified without ea_inode feature enabled"; + goto errout; + } + if (ea_ino && ((ea_ino == EXT4_ROOT_INO) || + !ext4_valid_inum(inode->i_sb, ea_ino))) { + err_str = "invalid ea_ino"; + goto errout; + } + if (size > EXT4_XATTR_SIZE_MAX) { + err_str = "e_value size too large"; + goto errout; + } if (size != 0 && entry->e_value_inum == 0) { u16 offs = le16_to_cpu(entry->e_value_offs); @@ -214,71 +260,56 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, * the padded and unpadded sizes, since the size may * overflow to 0 when adding padding. */ - if (offs > end - value_start) - return -EFSCORRUPTED; + if (offs > end - value_start) { + err_str = "e_value out of bounds"; + goto errout; + } value = value_start + offs; if (value < (void *)e + sizeof(u32) || size > end - value || - EXT4_XATTR_SIZE(size) > end - value) - return -EFSCORRUPTED; + EXT4_XATTR_SIZE(size) > end - value) { + err_str = "overlapping e_value "; + goto errout; + } } entry = EXT4_XATTR_NEXT(entry); } - + if (bh) + set_buffer_verified(bh); return 0; + +errout: + if (bh) + __ext4_error_inode(inode, function, line, 0, -err, + "corrupted xattr block %llu: %s", + (unsigned long long) bh->b_blocknr, + err_str); + else + __ext4_error_inode(inode, function, line, 0, -err, + "corrupted in-inode xattr: %s", err_str); + return err; } static inline int __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, const char *function, unsigned int line) { - int error = -EFSCORRUPTED; - - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || - BHDR(bh)->h_blocks != cpu_to_le32(1)) - goto errout; - if (buffer_verified(bh)) - return 0; - - error = -EFSBADCRC; - if (!ext4_xattr_block_csum_verify(inode, bh)) - goto errout; - error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size, - bh->b_data); -errout: - if (error) - __ext4_error_inode(inode, function, line, 0, -error, - "corrupted xattr block %llu", - (unsigned long long) bh->b_blocknr); - else - set_buffer_verified(bh); - return error; + return check_xattrs(inode, bh, BFIRST(bh), bh->b_data + bh->b_size, + bh->b_data, function, line); } #define ext4_xattr_check_block(inode, bh) \ __ext4_xattr_check_block((inode), (bh), __func__, __LINE__) -static int +int __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, void *end, const char *function, unsigned int line) { - int error = -EFSCORRUPTED; - - if (end - (void *)header < sizeof(*header) + sizeof(u32) || - (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC))) - goto errout; - error = ext4_xattr_check_entries(IFIRST(header), end, IFIRST(header)); -errout: - if (error) - __ext4_error_inode(inode, function, line, 0, -error, - "corrupted in-inode xattr"); - return error; + return check_xattrs(inode, NULL, IFIRST(header), end, IFIRST(header), + function, line); } -#define xattr_check_inode(inode, header, end) \ - __xattr_check_inode((inode), (header), (end), __func__, __LINE__) - static int xattr_find_entry(struct inode *inode, struct ext4_xattr_entry **pentry, void *end, int name_index, const char *name, int sorted) @@ -604,10 +635,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, return error; raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); - end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = xattr_check_inode(inode, header, end); - if (error) - goto cleanup; + end = ITAIL(inode, raw_inode); entry = IFIRST(header); error = xattr_find_entry(inode, &entry, end, name_index, name, 0); if (error) @@ -739,7 +767,6 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) struct ext4_xattr_ibody_header *header; struct ext4_inode *raw_inode; struct ext4_iloc iloc; - void *end; int error; if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) @@ -749,14 +776,9 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) return error; raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); - end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = xattr_check_inode(inode, header, end); - if (error) - goto cleanup; error = ext4_xattr_list_entries(dentry, IFIRST(header), buffer, buffer_size); -cleanup: brelse(iloc.bh); return error; } @@ -824,7 +846,6 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) struct ext4_xattr_ibody_header *header; struct ext4_xattr_entry *entry; qsize_t ea_inode_refs = 0; - void *end; int ret; lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem); @@ -835,10 +856,6 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) goto out; raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); - end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - ret = xattr_check_inode(inode, header, end); - if (ret) - goto out; for (entry = IFIRST(header); !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) @@ -2197,11 +2214,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, header = IHDR(inode, raw_inode); is->s.base = is->s.first = IFIRST(header); is->s.here = is->s.first; - is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; + is->s.end = ITAIL(inode, raw_inode); if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { - error = xattr_check_inode(inode, header, is->s.end); - if (error) - return error; /* Find the named attribute. */ error = xattr_find_entry(inode, &is->s.here, is->s.end, i->name_index, i->name, 0); @@ -2714,14 +2728,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, */ base = IFIRST(header); - end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; + end = ITAIL(inode, raw_inode); min_offs = end - base; total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32); - error = xattr_check_inode(inode, header, end); - if (error) - goto cleanup; - ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino); if (ifree >= isize_diff) goto shift; @@ -2782,6 +2792,9 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, (void *)header, total_ino); EXT4_I(inode)->i_extra_isize = new_extra_isize; + if (ext4_has_inline_data(inode)) + error = ext4_find_inline_data_nolock(inode); + cleanup: if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 824faf0b15a87..17c0d6bb230b9 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -68,6 +68,9 @@ struct ext4_xattr_entry { ((void *)raw_inode + \ EXT4_GOOD_OLD_INODE_SIZE + \ EXT4_I(inode)->i_extra_isize)) +#define ITAIL(inode, raw_inode) \ + ((void *)(raw_inode) + \ + EXT4_SB((inode)->i_sb)->s_inode_size) #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) /* @@ -207,6 +210,13 @@ extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, extern struct mb_cache *ext4_xattr_create_cache(void); extern void ext4_xattr_destroy_cache(struct mb_cache *); +extern int +__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, + void *end, const char *function, unsigned int line); + +#define xattr_check_inode(inode, header, end) \ + __xattr_check_inode((inode), (header), (end), __func__, __LINE__) + #ifdef CONFIG_EXT4_FS_SECURITY extern int ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, const struct qstr *qstr);