From adfe0937bf10b6e4ca737f5621f48dee16ecbefc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 Oct 2014 14:44:25 -0700 Subject: [PATCH 001/321] f2fs: initial build for v3.10 This patch is to build f2fs for v3.10. Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 171 +++++++++++++++++++++++++--- fs/f2fs/acl.h | 7 +- fs/f2fs/data.c | 28 ++--- fs/f2fs/dir.c | 72 ++++++++---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 34 ++++-- fs/f2fs/inode.c | 19 ++-- fs/f2fs/namei.c | 218 +----------------------------------- fs/f2fs/node.c | 3 +- fs/f2fs/recovery.c | 6 +- fs/f2fs/segment.c | 47 ++++++++ fs/f2fs/xattr.c | 9 +- fs/f2fs/xattr.h | 2 + include/trace/events/f2fs.h | 11 +- 14 files changed, 326 insertions(+), 303 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 83b9b5a8..81890804 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -164,11 +164,19 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) struct posix_acl *f2fs_get_acl(struct inode *inode, int type) { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; void *value = NULL; struct posix_acl *acl; int retval; + if (!test_opt(sbi, POSIX_ACL)) + return NULL; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + if (type == ACL_TYPE_ACCESS) name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; @@ -194,15 +202,21 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) return acl; } -static int __f2fs_set_acl(struct inode *inode, int type, +static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); int name_index; void *value = NULL; size_t size = 0; int error; + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; @@ -244,31 +258,154 @@ static int __f2fs_set_acl(struct inode *inode, int type, return error; } -int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) { - return __f2fs_set_acl(inode, type, acl, NULL); + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct posix_acl *acl = NULL; + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(sbi, POSIX_ACL)) { + acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) + inode->i_mode &= ~current_umask(); + } + + if (!test_opt(sbi, POSIX_ACL) || !acl) + goto cleanup; + + if (S_ISDIR(inode->i_mode)) { + error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl, ipage); + if (error) + goto cleanup; + } + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + if (error < 0) + return error; + if (error > 0) + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); +cleanup: + posix_acl_release(acl); + return error; } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) +int f2fs_acl_chmod(struct inode *inode) { - struct posix_acl *default_acl, *acl; - int error = 0; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct posix_acl *acl; + int error; + umode_t mode = get_inode_mode(inode); + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(mode)) + return -EOPNOTSUPP; - error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); + acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + error = posix_acl_chmod(&acl, GFP_KERNEL, mode); if (error) return error; - if (default_acl) { - error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, - ipage); - posix_acl_release(default_acl); - } - if (acl) { - if (error) - error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, - ipage); - posix_acl_release(acl); + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, NULL); + posix_acl_release(acl); + return error; +} + +static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + const char *xname = POSIX_ACL_XATTR_DEFAULT; + size_t size; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + + if (type == ACL_TYPE_ACCESS) + xname = POSIX_ACL_XATTR_ACCESS; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct posix_acl *acl; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) + return -ENODATA; + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else { + acl = NULL; } + error = f2fs_set_acl(inode, type, acl, NULL); + +release_and_out: + posix_acl_release(acl); return error; } + +const struct xattr_handler f2fs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; + +const struct xattr_handler f2fs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index e0864651..49633131 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -37,13 +37,18 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL extern struct posix_acl *f2fs_get_acl(struct inode *, int); -extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int f2fs_acl_chmod(struct inode *); extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); #else #define f2fs_check_acl NULL #define f2fs_get_acl NULL #define f2fs_set_acl NULL +static inline int f2fs_acl_chmod(struct inode *inode) +{ + return 0; +} + static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *page) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8e58c4cc..b60e55f8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -85,7 +85,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, bio = bio_alloc(GFP_NOIO, npages); bio->bi_bdev = sbi->sb->s_bdev; - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); + bio->bi_sector = SECTOR_FROM_BLOCK(blk_addr); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; bio->bi_private = sbi; @@ -940,7 +940,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, inode->i_size); + truncate_pagecache(inode, 0, inode->i_size); truncate_blocks(inode, inode->i_size, true); } } @@ -1068,9 +1068,10 @@ static int f2fs_write_end(struct file *file, } static int check_direct_IO(struct inode *inode, int rw, - struct iov_iter *iter, loff_t offset) + const struct iovec *iov, loff_t offset, unsigned long nr_segs) { unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; + int i; if (rw == READ) return 0; @@ -1078,31 +1079,33 @@ static int check_direct_IO(struct inode *inode, int rw, if (offset & blocksize_mask) return -EINVAL; - if (iov_iter_alignment(iter) & blocksize_mask) - return -EINVAL; - + for (i = 0; i < nr_segs; i++) + if (iov[i].iov_len & blocksize_mask) + return -EINVAL; return 0; } static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - size_t count = iov_iter_count(iter); + size_t count = iov_length(iov, nr_segs); int err; /* Let buffer I/O handle the inline data case. */ if (f2fs_has_inline_data(inode)) return 0; - if (check_direct_IO(inode, rw, iter, offset)) + if (check_direct_IO(inode, rw, iov, offset, nr_segs)) return 0; trace_f2fs_direct_IO_enter(inode, offset, count, rw); - err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); + err = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + get_data_block); if (err < 0 && (rw & WRITE)) f2fs_write_failed(mapping, offset + count); @@ -1111,12 +1114,11 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, return err; } -static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, - unsigned int length) +static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) { struct inode *inode = page->mapping->host; - if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) + if (offset % PAGE_CACHE_SIZE) return; if (PageDirty(page)) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b54f8714..0bd16f58 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -9,12 +9,17 @@ * published by the Free Software Foundation. */ #include +#include #include #include "f2fs.h" #include "node.h" #include "acl.h" #include "xattr.h" +#ifndef LOOKUP_NOCASE +#define LOOKUP_NOCASE 0 +#endif + static unsigned long dir_blocks(struct inode *inode) { return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1)) @@ -91,7 +96,8 @@ static bool early_match_name(size_t namelen, f2fs_hash_t namehash, static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, struct qstr *name, int *max_slots, - f2fs_hash_t namehash, struct page **res_page) + f2fs_hash_t namehash, struct page **res_page, + unsigned int flags) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; @@ -109,7 +115,15 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, continue; } de = &dentry_blk->dentry[bit_pos]; - if (early_match_name(name->len, namehash, de)) { + + if (flags & LOOKUP_NOCASE) { + if ((le16_to_cpu(de->name_len) == name->len) && + !strncasecmp(dentry_blk->filename[bit_pos], + name->name, name->len)) { + *res_page = dentry_page; + goto found; + } + } else if (early_match_name(name->len, namehash, de)) { if (!memcmp(dentry_blk->filename[bit_pos], name->name, name->len)) { @@ -141,7 +155,8 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, struct qstr *name, - f2fs_hash_t namehash, struct page **res_page) + f2fs_hash_t namehash, struct page **res_page, + unsigned int flags) { int s = GET_DENTRY_SLOTS(name->len); unsigned int nbucket, nblock; @@ -169,7 +184,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } de = find_in_block(dentry_page, name, &max_slots, - namehash, res_page); + namehash, res_page, flags); if (de) break; @@ -192,8 +207,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, * and the entry itself. Page is returned mapped and unlocked. * Entry is guaranteed to be valid. */ -struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, - struct qstr *child, struct page **res_page) +struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct qstr *child, + struct page **res_page, unsigned int flags) { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; @@ -210,7 +225,8 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, max_depth = F2FS_I(dir)->i_current_depth; for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, child, name_hash, res_page); + de = find_in_level(dir, level, child, name_hash, + res_page, flags); if (de) break; } @@ -244,7 +260,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) struct f2fs_dir_entry *de; struct page *page; - de = f2fs_find_entry(dir, qstr, &page); + de = f2fs_find_entry(dir, qstr, &page, 0); if (de) { res = le32_to_cpu(de->ino); kunmap(page); @@ -659,19 +675,25 @@ bool f2fs_empty_dir(struct inode *dir) return true; } -static int f2fs_readdir(struct file *file, struct dir_context *ctx) +static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) { + unsigned long pos = file->f_pos; + unsigned char *types = NULL; + unsigned int bit_pos = 0, start_bit_pos = 0; + int over = 0; struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); - unsigned int bit_pos = 0; struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; - unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); + unsigned int n = 0; unsigned char d_type = DT_UNKNOWN; + int slots; - bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); + types = f2fs_filetype_table; + bit_pos = (pos % NR_DENTRY_IN_BLOCK); + n = (pos / NR_DENTRY_IN_BLOCK); /* readahead for multi pages of dir */ if (npages - n > 1 && !ra_has_index(ra, n)) @@ -683,8 +705,10 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) if (IS_ERR(dentry_page)) continue; + start_bit_pos = bit_pos; dentry_blk = kmap(dentry_page); while (bit_pos < NR_DENTRY_IN_BLOCK) { + d_type = DT_UNKNOWN; bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, bit_pos); @@ -692,22 +716,24 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) break; de = &dentry_blk->dentry[bit_pos]; - if (de->file_type < F2FS_FT_MAX) - d_type = f2fs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; - if (!dir_emit(ctx, + if (types && de->file_type < F2FS_FT_MAX) + d_type = types[de->file_type]; + + over = filldir(dirent, dentry_blk->filename[bit_pos], le16_to_cpu(de->name_len), - le32_to_cpu(de->ino), d_type)) + (n * NR_DENTRY_IN_BLOCK) + bit_pos, + le32_to_cpu(de->ino), d_type); + if (over) { + file->f_pos += bit_pos - start_bit_pos; goto stop; - - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos; + } + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + bit_pos += slots; } bit_pos = 0; - ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); + file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; f2fs_put_page(dentry_page, 1); dentry_page = NULL; } @@ -723,7 +749,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) const struct file_operations f2fs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = f2fs_readdir, + .readdir = f2fs_readdir, .fsync = f2fs_sync_file, .unlocked_ioctl = f2fs_ioctl, }; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8171e80b..560a811e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1225,7 +1225,7 @@ struct dentry *f2fs_get_parent(struct dentry *child); * dir.c */ struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, - struct page **); + struct page **, unsigned int flags); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8e68bb64..acf07695 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -90,9 +90,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, static const struct vm_operations_struct f2fs_file_vm_ops = { .fault = filemap_fault, - .map_pages = filemap_map_pages, .page_mkwrite = f2fs_vm_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int get_parent_ino(struct inode *inode, nid_t *pino) @@ -279,6 +277,25 @@ static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, return false; } +static inline int unsigned_offsets(struct file *file) +{ + return file->f_mode & FMODE_UNSIGNED_OFFSET; +} + +static loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) +{ + if (offset < 0 && !unsigned_offsets(file)) + return -EINVAL; + if (offset > maxsize) + return -EINVAL; + + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } + return offset; +} + static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; @@ -581,7 +598,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) __setattr_copy(inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(inode, get_inode_mode(inode)); + err = f2fs_acl_chmod(inode); if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { inode->i_mode = fi->i_acl_mode; clear_inode_flag(fi, FI_ACL_MODE); @@ -596,7 +613,6 @@ const struct inode_operations f2fs_file_inode_operations = { .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, - .set_acl = f2fs_set_acl, #ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -980,10 +996,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations f2fs_file_operations = { .llseek = f2fs_llseek, - .read = new_sync_read, - .write = new_sync_write, - .read_iter = generic_file_read_iter, - .write_iter = generic_file_write_iter, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, .open = generic_file_open, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, @@ -993,5 +1009,5 @@ const struct file_operations f2fs_file_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = generic_file_splice_write, }; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0deead45..1c5dff29 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -12,7 +12,6 @@ #include #include #include -#include #include "f2fs.h" #include "node.h" @@ -22,20 +21,20 @@ void f2fs_set_inode_flags(struct inode *inode) { unsigned int flags = F2FS_I(inode)->i_flags; - unsigned int new_fl = 0; + + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | + S_NOATIME | S_DIRSYNC); if (flags & FS_SYNC_FL) - new_fl |= S_SYNC; + inode->i_flags |= S_SYNC; if (flags & FS_APPEND_FL) - new_fl |= S_APPEND; + inode->i_flags |= S_APPEND; if (flags & FS_IMMUTABLE_FL) - new_fl |= S_IMMUTABLE; + inode->i_flags |= S_IMMUTABLE; if (flags & FS_NOATIME_FL) - new_fl |= S_NOATIME; + inode->i_flags |= S_NOATIME; if (flags & FS_DIRSYNC_FL) - new_fl |= S_DIRSYNC; - set_mask_bits(&inode->i_flags, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); + inode->i_flags |= S_DIRSYNC; } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -274,7 +273,7 @@ void f2fs_evict_inode(struct inode *inode) commit_inmem_pages(inode, true); trace_f2fs_evict_inode(inode); - truncate_inode_pages_final(&inode->i_data); + truncate_inode_pages(&inode->i_data, 0); if (inode->i_ino == F2FS_NODE_INO(sbi) || inode->i_ino == F2FS_META_INO(sbi)) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0d2526e5..beda842e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -9,6 +9,7 @@ * published by the Free Software Foundation. */ #include +#include #include #include #include @@ -184,7 +185,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - de = f2fs_find_entry(dir, &dentry->d_name, &page); + de = f2fs_find_entry(dir, &dentry->d_name, &page, flags); if (de) { nid_t ino = le32_to_cpu(de->ino); kunmap(page); @@ -211,7 +212,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); f2fs_balance_fs(sbi); - de = f2fs_find_entry(dir, &dentry->d_name, &page); + de = f2fs_find_entry(dir, &dentry->d_name, &page, 0); if (!de) goto fail; @@ -361,7 +362,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_balance_fs(sbi); - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page, 0); if (!old_entry) goto out; @@ -380,7 +381,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, - &new_page); + &new_page, 0); if (!new_entry) goto out_dir; @@ -470,210 +471,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, return err; } -static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); - struct inode *old_inode = old_dentry->d_inode; - struct inode *new_inode = new_dentry->d_inode; - struct page *old_dir_page, *new_dir_page; - struct page *old_page, *new_page; - struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; - struct f2fs_dir_entry *old_entry, *new_entry; - int old_nlink = 0, new_nlink = 0; - int err = -ENOENT; - - f2fs_balance_fs(sbi); - - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); - if (!old_entry) - goto out; - - new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); - if (!new_entry) - goto out_old; - - /* prepare for updating ".." directory entry info later */ - if (old_dir != new_dir) { - if (S_ISDIR(old_inode->i_mode)) { - err = -EIO; - old_dir_entry = f2fs_parent_dir(old_inode, - &old_dir_page); - if (!old_dir_entry) - goto out_new; - } - - if (S_ISDIR(new_inode->i_mode)) { - err = -EIO; - new_dir_entry = f2fs_parent_dir(new_inode, - &new_dir_page); - if (!new_dir_entry) - goto out_old_dir; - } - } - - /* - * If cross rename between file and directory those are not - * in the same directory, we will inc nlink of file's parent - * later, so we should check upper boundary of its nlink. - */ - if ((!old_dir_entry || !new_dir_entry) && - old_dir_entry != new_dir_entry) { - old_nlink = old_dir_entry ? -1 : 1; - new_nlink = -old_nlink; - err = -EMLINK; - if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) || - (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX)) - goto out_new_dir; - } - - f2fs_lock_op(sbi); - - err = update_dent_inode(old_inode, &new_dentry->d_name); - if (err) - goto out_unlock; - - err = update_dent_inode(new_inode, &old_dentry->d_name); - if (err) - goto out_undo; - - /* update ".." directory entry info of old dentry */ - if (old_dir_entry) - f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); - - /* update ".." directory entry info of new dentry */ - if (new_dir_entry) - f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir); - - /* update directory entry info of old dir inode */ - f2fs_set_link(old_dir, old_entry, old_page, new_inode); - - down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); - up_write(&F2FS_I(old_inode)->i_sem); - - update_inode_page(old_inode); - - old_dir->i_ctime = CURRENT_TIME; - if (old_nlink) { - down_write(&F2FS_I(old_dir)->i_sem); - if (old_nlink < 0) - drop_nlink(old_dir); - else - inc_nlink(old_dir); - up_write(&F2FS_I(old_dir)->i_sem); - } - mark_inode_dirty(old_dir); - update_inode_page(old_dir); - - /* update directory entry info of new dir inode */ - f2fs_set_link(new_dir, new_entry, new_page, old_inode); - - down_write(&F2FS_I(new_inode)->i_sem); - file_lost_pino(new_inode); - up_write(&F2FS_I(new_inode)->i_sem); - - update_inode_page(new_inode); - - new_dir->i_ctime = CURRENT_TIME; - if (new_nlink) { - down_write(&F2FS_I(new_dir)->i_sem); - if (new_nlink < 0) - drop_nlink(new_dir); - else - inc_nlink(new_dir); - up_write(&F2FS_I(new_dir)->i_sem); - } - mark_inode_dirty(new_dir); - update_inode_page(new_dir); - - f2fs_unlock_op(sbi); - return 0; -out_undo: - /* Still we may fail to recover name info of f2fs_inode here */ - update_dent_inode(old_inode, &old_dentry->d_name); -out_unlock: - f2fs_unlock_op(sbi); -out_new_dir: - if (new_dir_entry) { - kunmap(new_dir_page); - f2fs_put_page(new_dir_page, 0); - } -out_old_dir: - if (old_dir_entry) { - kunmap(old_dir_page); - f2fs_put_page(old_dir_page, 0); - } -out_new: - kunmap(new_page); - f2fs_put_page(new_page, 0); -out_old: - kunmap(old_page); - f2fs_put_page(old_page, 0); -out: - return err; -} - -static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) -{ - if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) - return -EINVAL; - - if (flags & RENAME_EXCHANGE) { - return f2fs_cross_rename(old_dir, old_dentry, - new_dir, new_dentry); - } - /* - * VFS has already handled the new dentry existence case, - * here, we just deal with "RENAME_NOREPLACE" as regular rename. - */ - return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry); -} - -static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct inode *inode; - int err; - - inode = f2fs_new_inode(dir, mode); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - inode->i_op = &f2fs_file_inode_operations; - inode->i_fop = &f2fs_file_operations; - inode->i_mapping->a_ops = &f2fs_dblock_aops; - - f2fs_lock_op(sbi); - err = acquire_orphan_inode(sbi); - if (err) - goto out; - - err = f2fs_do_tmpfile(inode, dir); - if (err) - goto release_out; - - /* - * add this non-linked tmpfile to orphan list, in this way we could - * remove all unused data of tmpfile after abnormal power-off. - */ - add_orphan_inode(sbi, inode->i_ino); - f2fs_unlock_op(sbi); - - alloc_nid_done(sbi, inode->i_ino); - d_tmpfile(dentry, inode); - unlock_new_inode(inode); - return 0; - -release_out: - release_orphan_inode(sbi); -out: - handle_failed_inode(inode); - return err; -} - const struct inode_operations f2fs_dir_inode_operations = { .create = f2fs_create, .lookup = f2fs_lookup, @@ -683,12 +480,10 @@ const struct inode_operations f2fs_dir_inode_operations = { .mkdir = f2fs_mkdir, .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, - .rename2 = f2fs_rename2, - .tmpfile = f2fs_tmpfile, + .rename = f2fs_rename, .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, - .set_acl = f2fs_set_acl, #ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -715,7 +510,6 @@ const struct inode_operations f2fs_special_inode_operations = { .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, - .set_acl = f2fs_set_acl, #ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 44b8afef..e73ce18d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1355,8 +1355,7 @@ static int f2fs_set_node_page_dirty(struct page *page) return 0; } -static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, - unsigned int length) +static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) { struct inode *inode = page->mapping->host; if (PageDirty(page)) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ebd01322..680dd813 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -92,7 +92,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) goto out_err; } retry: - de = f2fs_find_entry(dir, &name, &page); + de = f2fs_find_entry(dir, &name, &page, 0); if (de && inode->i_ino == le32_to_cpu(de->ino)) { clear_inode_flag(F2FS_I(inode), FI_INC_LINK); goto out_unmap_put; @@ -526,8 +526,8 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); if (err) { - truncate_inode_pages_final(NODE_MAPPING(sbi)); - truncate_inode_pages_final(META_MAPPING(sbi)); + truncate_inode_pages(NODE_MAPPING(sbi), 0); + truncate_inode_pages(META_MAPPING(sbi), 0); } sbi->por_doing = false; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 923cb76f..d6e1a930 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -28,6 +28,53 @@ static struct kmem_cache *discard_entry_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; +/** + * Copied from latest lib/llist.c + * llist_for_each_entry_safe - iterate over some deleted entries of + * lock-less list of given type + * safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @node: the first entry of deleted list entries. + * @member: the name of the llist_node with the struct. + * + * In general, some entries of the lock-less list can be traversed + * safely only after being removed from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_entry_safe(pos, n, node, member) \ + for (pos = llist_entry((node), typeof(*pos), member); \ + &pos->member != NULL && \ + (n = llist_entry(pos->member.next, typeof(*n), member), true); \ + pos = n) + +/** + * Copied from latest lib/llist.c + * llist_reverse_order - reverse order of a llist chain + * @head: first item of the list to be reversed + * + * Reverse the order of a chain of llist entries and return the + * new first entry. + */ +struct llist_node *llist_reverse_order(struct llist_node *head) +{ + struct llist_node *new_head = NULL; + + while (head) { + struct llist_node *tmp = head; + head = head->next; + tmp->next = new_head; + new_head = tmp; + } + + return new_head; +} + /* * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since * MSB and LSB are reversed in a byte by f2fs_set_bit. diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index deca8728..55be5934 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -21,7 +21,6 @@ #include #include #include -#include #include "f2fs.h" #include "xattr.h" @@ -215,8 +214,8 @@ const struct xattr_handler f2fs_xattr_security_handler = { static const struct xattr_handler *f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, #ifdef CONFIG_F2FS_FS_POSIX_ACL - [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, - [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler, + [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler, + [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, #endif [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, #ifdef CONFIG_F2FS_FS_SECURITY @@ -228,8 +227,8 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = { const struct xattr_handler *f2fs_xattr_handlers[] = { &f2fs_xattr_user_handler, #ifdef CONFIG_F2FS_FS_POSIX_ACL - &posix_acl_access_xattr_handler, - &posix_acl_default_xattr_handler, + &f2fs_xattr_acl_access_handler, + &f2fs_xattr_acl_default_handler, #endif &f2fs_xattr_trusted_handler, #ifdef CONFIG_F2FS_FS_SECURITY diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 34ab7dbc..9b18c07a 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -108,6 +108,8 @@ struct f2fs_xattr_entry { #ifdef CONFIG_F2FS_FS_XATTR extern const struct xattr_handler f2fs_xattr_user_handler; extern const struct xattr_handler f2fs_xattr_trusted_handler; +extern const struct xattr_handler f2fs_xattr_acl_access_handler; +extern const struct xattr_handler f2fs_xattr_acl_default_handler; extern const struct xattr_handler f2fs_xattr_advise_handler; extern const struct xattr_handler f2fs_xattr_security_handler; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index bbc4de9b..e86c1eaf 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -698,8 +698,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, __entry->dev = sb->s_dev; __entry->rw = rw; __entry->type = type; - __entry->sector = bio->bi_iter.bi_sector; - __entry->size = bio->bi_iter.bi_size; + __entry->sector = bio->bi_sector; + __entry->size = bio->bi_size; ), TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u", @@ -874,7 +874,6 @@ TRACE_EVENT(f2fs_writepages, __field(char, tagged_writepages) __field(char, for_reclaim) __field(char, range_cyclic) - __field(char, for_sync) ), TP_fast_assign( @@ -893,12 +892,11 @@ TRACE_EVENT(f2fs_writepages, __entry->tagged_writepages = wbc->tagged_writepages; __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; - __entry->for_sync = wbc->for_sync; ), TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, " "skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, " - "kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u", + "kupdate %u background %u tagged %u reclaim %u cyclic %u", show_dev_ino(__entry), show_block_type(__entry->type), show_file_type(__entry->dir), @@ -912,8 +910,7 @@ TRACE_EVENT(f2fs_writepages, __entry->for_background, __entry->tagged_writepages, __entry->for_reclaim, - __entry->range_cyclic, - __entry->for_sync) + __entry->range_cyclic) ); TRACE_EVENT(f2fs_submit_page_mbio, From 6121b6bd27cca185c0de58887e8cbd7bc7dd18bd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Oct 2014 13:39:06 -0700 Subject: [PATCH 002/321] f2fs: invalidate inmemory page If user truncates file's data, we should truncate inmemory pages too. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 16 ++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b60e55f8..215fa336 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1121,6 +1121,9 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) if (offset % PAGE_CACHE_SIZE) return; + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + invalidate_inmem_page(inode, page); + if (PageDirty(page)) inode_dec_dirty_pages(inode); ClearPagePrivate(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 560a811e..5589314d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1296,6 +1296,7 @@ void destroy_node_manager_caches(void); * segment.c */ void register_inmem_page(struct inode *, struct page *); +void invalidate_inmem_page(struct inode *, struct page *); void commit_inmem_pages(struct inode *, bool); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d6e1a930..831d74d9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -239,6 +239,22 @@ void register_inmem_page(struct inode *inode, struct page *page) mutex_unlock(&fi->inmem_lock); } +void invalidate_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct inmem_pages *cur; + + mutex_lock(&fi->inmem_lock); + cur = radix_tree_lookup(&fi->inmem_root, page->index); + if (cur) { + radix_tree_delete(&fi->inmem_root, cur->page->index); + f2fs_put_page(cur->page, 0); + list_del(&cur->list); + kmem_cache_free(inmem_entry_slab, cur); + } + mutex_unlock(&fi->inmem_lock); +} + void commit_inmem_pages(struct inode *inode, bool abort) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); From 029b8b3b325ee3b34ac73a4fabcd54c0b4dc1566 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Oct 2014 13:19:53 -0700 Subject: [PATCH 003/321] f2fs: do not make dirty any inmemory pages This patch let inmemory pages be clean all the time. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++++---- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 14 +++++++++++++- fs/f2fs/super.c | 1 + 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 215fa336..639c0430 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1052,10 +1052,7 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) - register_inmem_page(inode, page); - else - set_page_dirty(page); + set_page_dirty(page); if (pos + copied > i_size_read(inode)) { i_size_write(inode, pos + copied); @@ -1143,6 +1140,12 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); SetPageUptodate(page); + + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) { + register_inmem_page(inode, page); + return 1; + } + mark_inode_dirty(inode); if (!PageDirty(page)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5589314d..ae7dbc0c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -269,6 +269,7 @@ struct f2fs_inode_info { struct extent_info ext; /* in-memory extent cache entry */ struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 831d74d9..ce929b75 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -225,7 +225,8 @@ void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; - + int err; +retry: new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ @@ -234,6 +235,16 @@ void register_inmem_page(struct inode *inode, struct page *page) /* increase reference count with clean state */ mutex_lock(&fi->inmem_lock); + err = radix_tree_insert(&fi->inmem_root, page->index, new); + if (err == -EEXIST) { + mutex_unlock(&fi->inmem_lock); + kmem_cache_free(inmem_entry_slab, new); + return; + } else if (err) { + mutex_unlock(&fi->inmem_lock); + kmem_cache_free(inmem_entry_slab, new); + goto retry; + } get_page(page); list_add_tail(&new->list, &fi->inmem_pages); mutex_unlock(&fi->inmem_lock); @@ -279,6 +290,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) do_write_data_page(cur->page, &fio); submit_bio = true; } + radix_tree_delete(&fi->inmem_root, cur->page->index); f2fs_put_page(cur->page, 1); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41d6f700..76b14c8c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -373,6 +373,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_advise = 0; rwlock_init(&fi->ext.ext_lock); init_rwsem(&fi->i_sem); + INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); From 8214b3c224917716153e17a8f053c1757b839dd1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Oct 2014 20:33:55 -0700 Subject: [PATCH 004/321] f2fs: avoid to allocate when inline_data was written The sceanrio is like this. inline_data i_size page write_begin/vm_page_mkwrite X 30 dirty_page X 30 write to #4096 position X 30 get_dnode_of_data wait for get_dnode_of_data O 30 write inline_data O 30 get_dnode_of_data O 30 reserve data block .. In this case, we have #0 = NEW_ADDR and inline_data as well. We should not allow this condition for further access. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 32 +++++++++++++++++++++++--------- fs/f2fs/file.c | 26 ++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 639c0430..71a29744 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -257,9 +257,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) bool need_put = dn->inode_page ? false : true; int err; - /* if inode_page exists, index should be zero */ - f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); - err = get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; @@ -951,7 +948,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; + struct page *page, *ipage; pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; int err = 0; @@ -979,13 +976,26 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, goto inline_data; f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, index); - f2fs_unlock_op(sbi); - if (err) { + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + goto unlock_fail; + + if (f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + f2fs_unlock_op(sbi); f2fs_put_page(page, 0); - goto fail; + goto repeat; } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + err = f2fs_reserve_block(&dn, index); + if (err) + goto unlock_fail; + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + inline_data: lock_page(page); if (unlikely(page->mapping != mapping)) { @@ -1038,6 +1048,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, SetPageUptodate(page); clear_cold_data(page); return 0; + +unlock_fail: + f2fs_unlock_op(sbi); + f2fs_put_page(page, 0); fail: f2fs_write_failed(mapping, pos + len); return err; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index acf07695..f92de7fc 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,12 +35,13 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; + struct page *ipage; int err; f2fs_balance_fs(sbi); sb_start_pagefault(inode->i_sb); - +retry: /* force to convert with normal data indices */ err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); if (err) @@ -48,11 +49,28 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, /* block allocation */ f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + f2fs_unlock_op(sbi); + goto out; + } + + if (f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + f2fs_unlock_op(sbi); + goto retry; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, page->index); - f2fs_unlock_op(sbi); - if (err) + if (err) { + f2fs_unlock_op(sbi); goto out; + } + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); file_update_time(vma->vm_file); lock_page(page); From e93879183d68573dd3114dc2f833eab023642f03 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Oct 2014 17:57:29 -0700 Subject: [PATCH 005/321] f2fs: use highmem for directory pages This patch fixes to use highmem for directory pages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- include/linux/f2fs_fs.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 1c5dff29..c8b71ec7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -155,7 +155,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 860313a3..6d7381b4 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -33,7 +33,8 @@ #define F2FS_META_INO(sbi) (sbi->meta_ino_num) /* This flag is used by node and meta inodes, and by recovery */ -#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) +#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) /* * For further optimization on multi-head logs, on-disk layout supports maximum From 49132f73fe0a507bb5210f627335246cc5f76a0f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Oct 2014 10:24:34 -0700 Subject: [PATCH 006/321] f2fs: fix race conditon on truncation with inline_data Let's consider the following scenario. blkaddr[0] inline_data i_size i_blocks writepage truncate NEW X 4096 2 dirty page #0 NEW X 0 change i_size NEW X 0 2 f2fs_write_inline_data NEW X 0 2 get_dnode_of_data NEW X 0 2 truncate_data_blocks_range NULL O 0 1 memcpy(inline_data) NULL O 0 1 f2fs_put_dnode NULL O 0 1 f2fs_truncate NULL O 0 1 get_dnode_of_data NULL O 0 1 *invalid block addr* This patch adds checking inline_data flag during f2fs_truncate not to refer corrupted block indices. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f92de7fc..1079fb86 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -508,6 +508,12 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) return err; } + /* writepage can convert inline_data under get_donde_of_data */ + if (f2fs_has_inline_data(inode)) { + f2fs_put_dnode(&dn); + goto done; + } + count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); count -= dn.ofs_in_node; From 9130da9f35966b86602e7068a894b16dc3f42936 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Oct 2014 10:16:54 -0700 Subject: [PATCH 007/321] f2fs: should truncate any allocated block for inline_data write When trying to write inline_data, we should truncate any data block allocated and pointed by the inode block. We should consider the data index is not 0. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 88036fd7..e3abcfb5 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -166,6 +166,14 @@ int f2fs_write_inline_data(struct inode *inode, return err; ipage = dn.inode_page; + /* Release any data block if it is allocated */ + if (!f2fs_has_inline_data(inode)) { + int count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); + truncate_data_blocks_range(&dn, count); + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + stat_inc_inline_inode(inode); + } + f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); @@ -174,13 +182,6 @@ int f2fs_write_inline_data(struct inode *inode, memcpy(dst_addr, src_addr, size); kunmap(page); - /* Release the first data block if it is allocated */ - if (!f2fs_has_inline_data(inode)) { - truncate_data_blocks_range(&dn, 1); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_inc_inline_inode(inode); - } - set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); sync_inode_page(&dn); f2fs_put_dnode(&dn); From 0a58ee599a75ce5bc242d66d666ce18f313eff27 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Oct 2014 11:43:30 -0700 Subject: [PATCH 008/321] f2fs: avoid build warning This patch removes build warning. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ce929b75..fdb0d3f6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1762,7 +1762,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * #2, flush sit entries to sit page. */ list_for_each_entry_safe(ses, tmp, head, set_list) { - struct page *page; + struct page *page = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start_segno = ses->start_segno; unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, From caec5c10a2f91a17aeab22486b73f5860593a4d8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 19 Oct 2014 01:43:23 -0700 Subject: [PATCH 009/321] f2fs: fix to call f2fs_unlock_op This patch fixes to call f2fs_unlock_op, which was missing before. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1079fb86..75dcc4c0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -511,7 +511,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) /* writepage can convert inline_data under get_donde_of_data */ if (f2fs_has_inline_data(inode)) { f2fs_put_dnode(&dn); - goto done; + goto unlock_done; } count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); @@ -527,6 +527,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); +unlock_done: if (lock) f2fs_unlock_op(sbi); done: From f37bc46a1e99a246401410b5ad623344b6ccc8cc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Oct 2014 14:14:16 -0700 Subject: [PATCH 010/321] f2fs: avoid infinite loop at cp_error This patch avoids an infinite loop in sync_dirty_inode_page when -EIO was detected. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd10a031..ca514d59 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -731,6 +731,9 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) struct dir_inode_entry *entry; struct inode *inode; retry: + if (unlikely(f2fs_cp_error(sbi))) + return; + spin_lock(&sbi->dir_inode_lock); head = &sbi->dir_inode_list; From a3e1a5a8cc2d22a89997fdf108a59e4e9fe1bcd8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:15:19 +0800 Subject: [PATCH 011/321] f2fs: add infra struct and helper for inline dir This patch defines macro/inline dentry structure, and adds some helpers for inline dir infrastructure. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 21 +++++++++++++++++++-- include/linux/f2fs_fs.h | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ae7dbc0c..b0cd0d12 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -46,8 +46,9 @@ #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 #define F2FS_MOUNT_INLINE_XATTR 0x00000080 #define F2FS_MOUNT_INLINE_DATA 0x00000100 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 -#define F2FS_MOUNT_NOBARRIER 0x00000400 +#define F2FS_MOUNT_INLINE_DENTRY 0x00000200 +#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 +#define F2FS_MOUNT_NOBARRIER 0x00000800 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -1058,6 +1059,7 @@ enum { FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ FI_APPEND_WRITE, /* inode has appended data */ FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ @@ -1104,6 +1106,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_XATTR); if (ri->i_inline & F2FS_INLINE_DATA) set_inode_flag(fi, FI_INLINE_DATA); + if (ri->i_inline & F2FS_INLINE_DENTRY) + set_inode_flag(fi, FI_INLINE_DENTRY); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1115,6 +1119,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_XATTR; if (is_inode_flag_set(fi, FI_INLINE_DATA)) ri->i_inline |= F2FS_INLINE_DATA; + if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) + ri->i_inline |= F2FS_INLINE_DENTRY; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1165,6 +1171,17 @@ static inline void *inline_data_addr(struct page *page) return (void *)&(ri->i_addr[1]); } +static inline int f2fs_has_inline_dentry(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); +} + +static inline void *inline_dentry_addr(struct page *page) +{ + struct f2fs_inode *ri = F2FS_INODE(page); + return (void *)&(ri->i_addr[1]); +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 6d7381b4..63f8303b 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -171,6 +171,7 @@ struct f2fs_extent { #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ +#define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) @@ -436,6 +437,24 @@ struct f2fs_dentry_block { __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; +/* for inline dir */ +#define NR_INLINE_DENTRY (MAX_INLINE_DATA * BITS_PER_BYTE / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + BITS_PER_BYTE + 1)) +#define INLINE_DENTRY_BITMAP_SIZE ((NR_INLINE_DENTRY + \ + BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_RESERVED_SIZE (MAX_INLINE_DATA - \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE)) + +/* inline directory entry structure */ +struct f2fs_inline_dentry { + __u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE]; + __u8 reserved[INLINE_RESERVED_SIZE]; + struct f2fs_dir_entry dentry[NR_INLINE_DENTRY]; + __u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN]; +} __packed; + /* file types used in inode_info->flags */ enum { F2FS_FT_UNKNOWN, From 2fc18d83e8551c249c38fc9696072a8412c2e3de Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:16:13 +0800 Subject: [PATCH 012/321] f2fs: add a new mount option for inline dir Adds a new mount option 'inline_dentry' for inline dir. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 76b14c8c..73993a9e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -51,6 +51,7 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_inline_data, + Opt_inline_dentry, Opt_flush_merge, Opt_nobarrier, Opt_err, @@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_inline_data, "inline_data"}, + {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, {Opt_err, NULL}, @@ -340,6 +342,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_inline_data: set_opt(sbi, INLINE_DATA); break; + case Opt_inline_dentry: + set_opt(sbi, INLINE_DENTRY); + break; case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; @@ -563,6 +568,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); + if (test_opt(sbi, INLINE_DENTRY)) + seq_puts(seq, ",inline_dentry"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) From 34295c0c442fbe09c552a8e32289a19334a77f3e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:17:04 +0800 Subject: [PATCH 013/321] f2fs: export dir operations for inline dir This patch exports some dir operations for inline dir, additionally introduces f2fs_drop_nlink from f2fs_delete_entry for reusing by inline dir function. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 71 +++++++++++++++++++++++++--------------------- fs/f2fs/f2fs.h | 10 ++++++- fs/f2fs/namei.c | 4 +-- fs/f2fs/recovery.c | 2 +- 4 files changed, 50 insertions(+), 37 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 0bd16f58..e8f01792 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -42,7 +42,7 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } -static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { +unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { [F2FS_FT_UNKNOWN] = DT_UNKNOWN, [F2FS_FT_REG_FILE] = DT_REG, [F2FS_FT_DIR] = DT_DIR, @@ -64,7 +64,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) { umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; @@ -82,7 +82,7 @@ static unsigned long dir_block_index(unsigned int level, return bidx; } -static bool early_match_name(size_t namelen, f2fs_hash_t namehash, +bool early_match_name(size_t namelen, f2fs_hash_t namehash, struct f2fs_dir_entry *de) { if (le16_to_cpu(de->name_len) != namelen) @@ -323,7 +323,6 @@ static int make_empty_dir(struct inode *inode, if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - dentry_blk = kmap_atomic(dentry_page); de = &dentry_blk->dentry[0]; @@ -349,7 +348,7 @@ static int make_empty_dir(struct inode *inode, return 0; } -static struct page *init_inode_metadata(struct inode *inode, +struct page *init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *name) { struct page *page; @@ -411,7 +410,7 @@ static struct page *init_inode_metadata(struct inode *inode, return ERR_PTR(err); } -static void update_parent_metadata(struct inode *dir, struct inode *inode, +void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { @@ -576,16 +575,44 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) return err; } +void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + down_write(&F2FS_I(inode)->i_sem); + + if (S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + if (page) + update_inode(dir, page); + else + update_inode_page(dir); + } + inode->i_ctime = CURRENT_TIME; + + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + up_write(&F2FS_I(inode)->i_sem); + update_inode_page(inode); + + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); +} + /* * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *inode) + struct inode *dir, struct inode *inode) { struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; - struct inode *dir = page->mapping->host; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; @@ -606,29 +633,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dir->i_ctime = dir->i_mtime = CURRENT_TIME; - if (inode) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - - down_write(&F2FS_I(inode)->i_sem); - - if (S_ISDIR(inode->i_mode)) { - drop_nlink(dir); - update_inode_page(dir); - } - inode->i_ctime = CURRENT_TIME; - drop_nlink(inode); - if (S_ISDIR(inode->i_mode)) { - drop_nlink(inode); - i_size_write(inode, 0); - } - up_write(&F2FS_I(inode)->i_sem); - update_inode_page(inode); - - if (inode->i_nlink == 0) - add_orphan_inode(sbi, inode->i_ino); - else - release_orphan_inode(sbi); - } + if (inode) + f2fs_drop_nlink(dir, inode, NULL); if (bit_pos == NR_DENTRY_IN_BLOCK) { truncate_hole(dir, page->index, page->index + 1); @@ -644,7 +650,7 @@ bool f2fs_empty_dir(struct inode *dir) unsigned long bidx; struct page *dentry_page; unsigned int bit_pos; - struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); for (bidx = 0; bidx < nblock; bidx++) { @@ -656,7 +662,6 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - dentry_blk = kmap_atomic(dentry_page); if (bidx == 0) bit_pos = 2; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b0cd0d12..859afe49 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1242,6 +1242,13 @@ struct dentry *f2fs_get_parent(struct dentry *child); /* * dir.c */ +extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; +bool early_match_name(size_t, f2fs_hash_t, struct f2fs_dir_entry *); +void set_de_type(struct f2fs_dir_entry *, struct inode *); +struct page *init_inode_metadata(struct inode *, struct inode *, + const struct qstr *); +void update_parent_metadata(struct inode *, struct inode *, unsigned int); +void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **, unsigned int flags); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); @@ -1250,7 +1257,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); -void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, + struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); bool f2fs_empty_dir(struct inode *); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index beda842e..0ab6f483 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -224,7 +224,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_put_page(page, 0); goto fail; } - f2fs_delete_entry(de, page, inode); + f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); /* In order to evict this inode, we set it dirty */ @@ -436,7 +436,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_ctime = CURRENT_TIME; mark_inode_dirty(old_inode); - f2fs_delete_entry(old_entry, old_page, NULL); + f2fs_delete_entry(old_entry, old_page, old_dir, NULL); if (old_dir_entry) { if (old_dir != new_dir) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 680dd813..e74ed769 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -111,7 +111,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) iput(einode); goto out_unmap_put; } - f2fs_delete_entry(de, page, einode); + f2fs_delete_entry(de, page, dir, einode); iput(einode); goto retry; } From d9e73998d30ddd304eb5756caa9b87628657a87b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:17:53 +0800 Subject: [PATCH 014/321] f2fs: add key function to handle inline dir Adds Functions to implement inline dir init/lookup/insert/delete/convert ops. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 9 ++ fs/f2fs/inline.c | 346 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 355 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 859afe49..50fd4f10 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1551,4 +1551,13 @@ int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); void truncate_inline_data(struct inode *, u64); bool recover_inline_data(struct inode *, struct page *); +struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); +int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, + struct inode *, struct inode *); +bool f2fs_empty_inline_dir(struct inode *); +int f2fs_read_inline_dir(struct file *, struct dir_context *); #endif diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e3abcfb5..a9b8d7ef 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -258,3 +258,349 @@ bool recover_inline_data(struct inode *inode, struct page *npage) } return false; } + +struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, + struct qstr *name, struct page **res_page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct page *ipage; + struct f2fs_dir_entry *de; + f2fs_hash_t namehash; + unsigned long bit_pos = 0; + struct f2fs_inline_dentry *dentry_blk; + const void *dentry_bits; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + namehash = f2fs_dentry_hash(name); + + dentry_blk = inline_data_addr(ipage); + dentry_bits = &dentry_blk->dentry_bitmap; + + while (bit_pos < NR_INLINE_DENTRY) { + if (!test_bit_le(bit_pos, dentry_bits)) { + bit_pos++; + continue; + } + de = &dentry_blk->dentry[bit_pos]; + if (early_match_name(name->len, namehash, de)) { + if (!memcmp(dentry_blk->filename[bit_pos], + name->name, + name->len)) { + *res_page = ipage; + goto found; + } + } + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs are occurred. + */ + f2fs_bug_on(F2FS_P_SB(ipage), !de->name_len); + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + } + + de = NULL; +found: + unlock_page(ipage); + return de; +} + +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir, + struct page **p) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + struct f2fs_dir_entry *de; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + dentry_blk = inline_data_addr(ipage); + de = &dentry_blk->dentry[1]; + *p = ipage; + unlock_page(ipage); + return de; +} + +int make_empty_inline_dir(struct inode *inode, struct inode *parent, + struct page *ipage) +{ + struct f2fs_inline_dentry *dentry_blk; + struct f2fs_dir_entry *de; + + dentry_blk = inline_data_addr(ipage); + + de = &dentry_blk->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(dentry_blk->filename[0], ".", 1); + set_de_type(de, inode); + + de = &dentry_blk->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(dentry_blk->filename[1], "..", 2); + set_de_type(de, inode); + + test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + + set_page_dirty(ipage); + + /* update i_size to MAX_INLINE_DATA */ + if (i_size_read(inode) < MAX_INLINE_DATA) { + i_size_write(inode, MAX_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + } + return 0; +} + +int room_in_inline_dir(struct f2fs_inline_dentry *dentry_blk, int slots) +{ + int bit_start = 0; + int zero_start, zero_end; +next: + zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + bit_start); + if (zero_start >= NR_INLINE_DENTRY) + return NR_INLINE_DENTRY; + + zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + zero_start); + if (zero_end - zero_start >= slots) + return zero_start; + + bit_start = zero_end + 1; + + if (zero_end + 1 >= NR_INLINE_DENTRY) + return NR_INLINE_DENTRY; + goto next; +} + +static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, + struct f2fs_inline_dentry *inline_dentry) +{ + struct page *page; + struct dnode_of_data dn; + struct f2fs_dentry_block *dentry_blk; + int err; + + page = grab_cache_page(dir->i_mapping, 0); + if (!page) + return -ENOMEM; + + set_new_dnode(&dn, dir, ipage, NULL, 0); + err = f2fs_reserve_block(&dn, 0); + if (err) + goto out; + + f2fs_wait_on_page_writeback(page, DATA); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + + dentry_blk = kmap(page); + + /* copy data from inline dentry block to new dentry block */ + memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, + INLINE_DENTRY_BITMAP_SIZE); + memcpy(dentry_blk->dentry, inline_dentry->dentry, + sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); + memcpy(dentry_blk->filename, inline_dentry->filename, + NR_INLINE_DENTRY * F2FS_SLOT_LEN); + + kunmap(page); + SetPageUptodate(page); + set_page_dirty(page); + + /* clear inline dir and flag after data writeback */ + zero_user_segment(ipage, INLINE_DATA_OFFSET, + INLINE_DATA_OFFSET + MAX_INLINE_DATA); + + stat_dec_inline_inode(dir); + + if (i_size_read(dir) < PAGE_CACHE_SIZE) { + i_size_write(dir, PAGE_CACHE_SIZE); + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + + sync_inode_page(&dn); +out: + f2fs_put_page(page, 1); + return err; +} + +int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, + struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos; + f2fs_hash_t name_hash; + struct f2fs_dir_entry *de; + size_t namelen = name->len; + struct f2fs_inline_dentry *dentry_blk = NULL; + int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; + int err = 0; + int i; + + name_hash = f2fs_dentry_hash(name); + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + dentry_blk = inline_data_addr(ipage); + bit_pos = room_in_inline_dir(dentry_blk, slots); + if (bit_pos >= NR_INLINE_DENTRY) { + err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); + if (!err) + err = -EAGAIN; + goto out; + } + + f2fs_wait_on_page_writeback(ipage, DATA); + + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + de = &dentry_blk->dentry[bit_pos]; + de->hash_code = name_hash; + de->name_len = cpu_to_le16(namelen); + memcpy(dentry_blk->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + set_page_dirty(ipage); + + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, 0); +fail: + up_write(&F2FS_I(inode)->i_sem); + + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { + update_inode(dir, ipage); + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } +out: + f2fs_put_page(ipage, 1); + return err; +} + +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *dir, struct inode *inode) +{ + struct f2fs_inline_dentry *inline_dentry; + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + unsigned int bit_pos; + int i; + + lock_page(page); + f2fs_wait_on_page_writeback(page, DATA); + + inline_dentry = inline_data_addr(page); + bit_pos = dentry - inline_dentry->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, + &inline_dentry->dentry_bitmap); + + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode) + f2fs_drop_nlink(dir, inode, page); + + f2fs_put_page(page, 1); +} + +bool f2fs_empty_inline_dir(struct inode *dir) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos = 2; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return false; + + dentry_blk = inline_data_addr(ipage); + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + bit_pos); + + f2fs_put_page(ipage, 1); + + if (bit_pos < NR_INLINE_DENTRY) + return false; + + return true; +} + +int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) +{ + struct inode *inode = file_inode(file); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int bit_pos = 0; + struct f2fs_inline_dentry *inline_dentry = NULL; + struct f2fs_dir_entry *de = NULL; + struct page *ipage = NULL; + unsigned char d_type = DT_UNKNOWN; + + if (ctx->pos == NR_INLINE_DENTRY) + return 0; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + bit_pos = ((unsigned long)ctx->pos % NR_INLINE_DENTRY); + + inline_dentry = inline_data_addr(ipage); + while (bit_pos < NR_INLINE_DENTRY) { + bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap, + NR_INLINE_DENTRY, + bit_pos); + if (bit_pos >= NR_INLINE_DENTRY) + break; + + de = &inline_dentry->dentry[bit_pos]; + if (de->file_type < F2FS_FT_MAX) + d_type = f2fs_filetype_table[de->file_type]; + else + d_type = DT_UNKNOWN; + + if (!dir_emit(ctx, + inline_dentry->filename[bit_pos], + le16_to_cpu(de->name_len), + le32_to_cpu(de->ino), d_type)) + goto out; + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + ctx->pos = bit_pos; + } + + ctx->pos = NR_INLINE_DENTRY; +out: + f2fs_put_page(ipage, 1); + + return 0; +} From 3bcff7439a00bf90eb5c5298c46f0224794d92c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:19:10 +0800 Subject: [PATCH 015/321] f2fs: enable inline dir handling Add inline dir functions into normal dir ops' function to handle inline ops. Besides, we enable inline dir mode when a new dir inode is created if inline_data option is on. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/dir.c fs/f2fs/namei.c Change-Id: Ic708153b2bacafb5bad359309e4b785a8b83dc54 --- fs/f2fs/dir.c | 32 ++++++++++++++++++++++++++++++-- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/inline.c | 43 ++++++++++++++++++++++++++----------------- fs/f2fs/namei.c | 22 ++++++++++++++++------ fs/f2fs/recovery.c | 3 ++- 6 files changed, 76 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index e8f01792..84dd8980 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -216,6 +216,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct qstr *child, unsigned int max_depth; unsigned int level; + if (f2fs_has_inline_dentry(dir)) + return find_in_inline_dir(dir, child, res_page); + if (npages == 0) return NULL; @@ -243,6 +246,9 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) struct f2fs_dir_entry *de; struct f2fs_dentry_block *dentry_blk; + if (f2fs_has_inline_dentry(dir)) + return f2fs_parent_inline_dir(dir, p); + page = get_lock_data_page(dir, 0); if (IS_ERR(page)) return NULL; @@ -263,7 +269,8 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) de = f2fs_find_entry(dir, qstr, &page, 0); if (de) { res = le32_to_cpu(de->ino); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); f2fs_put_page(page, 0); } @@ -277,7 +284,8 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, DATA); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -319,6 +327,9 @@ static int make_empty_dir(struct inode *inode, struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; + if (f2fs_has_inline_dentry(inode)) + return make_empty_inline_dir(inode, parent, page); + dentry_page = get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -478,6 +489,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, int err = 0; int i; + if (f2fs_has_inline_dentry(dir)) { + err = f2fs_add_inline_entry(dir, name, inode); + if (!err || err != -EAGAIN) + return err; + else + err = 0; + } + dentry_hash = f2fs_dentry_hash(name); level = 0; current_depth = F2FS_I(dir)->i_current_depth; @@ -616,6 +635,9 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; + if (f2fs_has_inline_dentry(dir)) + return f2fs_delete_inline_entry(dentry, page, dir, inode); + lock_page(page); f2fs_wait_on_page_writeback(page, DATA); @@ -653,6 +675,9 @@ bool f2fs_empty_dir(struct inode *dir) struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); + if (f2fs_has_inline_dentry(dir)) + return f2fs_empty_inline_dir(dir); + for (bidx = 0; bidx < nblock; bidx++) { dentry_page = get_lock_data_page(dir, bidx); if (IS_ERR(dentry_page)) { @@ -696,6 +721,9 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) unsigned char d_type = DT_UNKNOWN; int slots; + if (f2fs_has_inline_dentry(inode)) + return f2fs_read_inline_dir(file, dirent, filldir); + types = f2fs_filetype_table; bit_pos = (pos % NR_DENTRY_IN_BLOCK); n = (pos / NR_DENTRY_IN_BLOCK); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 50fd4f10..174ede79 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1559,5 +1559,5 @@ int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); -int f2fs_read_inline_dir(struct file *, struct dir_context *); +int f2fs_read_inline_dir(struct file *, void *, filldir_t); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 75dcc4c0..057c6dbe 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -331,7 +331,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; /* handle inline data case */ - if (f2fs_has_inline_data(inode)) { + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { if (whence == SEEK_HOLE) data_ofs = isize; goto found; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a9b8d7ef..7b76b519 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -424,7 +424,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, /* clear inline dir and flag after data writeback */ zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); - + clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); stat_dec_inline_inode(dir); if (i_size_read(dir) < PAGE_CACHE_SIZE) { @@ -555,27 +555,34 @@ bool f2fs_empty_inline_dir(struct inode *dir) return true; } -int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) +int f2fs_read_inline_dir(struct file *file, void *dirent, filldir_t filldir) { + unsigned long pos = file->f_pos; + unsigned char *types = NULL; + unsigned int bit_pos = 0, start_bit_pos = 0; + int over = 0; struct inode *inode = file_inode(file); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int bit_pos = 0; struct f2fs_inline_dentry *inline_dentry = NULL; - struct f2fs_dir_entry *de = NULL; struct page *ipage = NULL; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_dir_entry *de = NULL; unsigned char d_type = DT_UNKNOWN; + int slots; - if (ctx->pos == NR_INLINE_DENTRY) + if (pos >= NR_INLINE_DENTRY) return 0; + types = f2fs_filetype_table; + bit_pos = (pos % NR_INLINE_DENTRY); + ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); - bit_pos = ((unsigned long)ctx->pos % NR_INLINE_DENTRY); - + start_bit_pos = bit_pos; inline_dentry = inline_data_addr(ipage); while (bit_pos < NR_INLINE_DENTRY) { + d_type = DT_UNKNOWN; bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap, NR_INLINE_DENTRY, bit_pos); @@ -583,22 +590,24 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) break; de = &inline_dentry->dentry[bit_pos]; - if (de->file_type < F2FS_FT_MAX) - d_type = f2fs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; + if (types && de->file_type < F2FS_FT_MAX) + d_type = types[de->file_type]; - if (!dir_emit(ctx, + over = filldir(dirent, inline_dentry->filename[bit_pos], le16_to_cpu(de->name_len), - le32_to_cpu(de->ino), d_type)) + bit_pos, + le32_to_cpu(de->ino), d_type); + if (over) { + file->f_pos += bit_pos - start_bit_pos; goto out; - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - ctx->pos = bit_pos; + } + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + bit_pos += slots; } - ctx->pos = NR_INLINE_DENTRY; + file->f_pos = NR_INLINE_DENTRY; out: f2fs_put_page(ipage, 1); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0ab6f483..ca9b8ab4 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -55,6 +55,10 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; goto out; } + + if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + trace_f2fs_new_inode(inode, 0); mark_inode_dirty(inode); return inode; @@ -188,7 +192,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, de = f2fs_find_entry(dir, &dentry->d_name, &page, flags); if (de) { nid_t ino = le32_to_cpu(de->ino); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); @@ -220,7 +225,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); f2fs_put_page(page, 0); goto fail; } @@ -444,7 +450,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_page, new_dir); update_inode_page(old_inode); } else { - kunmap(old_dir_page); + if (!f2fs_has_inline_dentry(old_inode)) + kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); @@ -457,15 +464,18 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - kunmap(new_page); + if (!f2fs_has_inline_dentry(new_dir)) + kunmap(new_page); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) { - kunmap(old_dir_page); + if (!f2fs_has_inline_dentry(old_inode)) + kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } out_old: - kunmap(old_page); + if (!f2fs_has_inline_dentry(old_dir)) + kunmap(old_page); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e74ed769..3ab8d822 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -129,7 +129,8 @@ static int recover_dentry(struct inode *inode, struct page *ipage) goto out; out_unmap_put: - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); f2fs_put_page(page, 0); out_err: iput(dir); From 9e58cb12a65f21f1af1b79b351c4d5f3cee0c29b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:20:23 +0800 Subject: [PATCH 016/321] f2fs: update f2fs documentation for inline dir support This patch adds descriptions for the inline dir support in f2fs document. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index d9086e6d..7067a539 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -122,6 +122,10 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs inline_xattr Enable the inline xattrs feature. inline_data Enable the inline data feature: New created small(<~3.4k) files can be written into inode block. +inline_dentry Enable the inline dir feature: data in new created + directory entries can be written into inode block. The + space of inode block which is used to store inline + dentries is limited to ~3.4k. flush_merge Merge concurrent cache_flush commands as much as possible to eliminate redundant command issues. If the underlying device handles the cache_flush command relatively slowly, From b0666f007f9f29295fb2ebaba498194463421d9b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 16:28:13 -0700 Subject: [PATCH 017/321] f2fs: reuse room_for_filename for inline dentry operation This patch introduces to reuse the existing room_for_filename for inline dentry operation. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/inline.c --- fs/f2fs/dir.c | 21 +++++++++------------ fs/f2fs/f2fs.h | 1 + fs/f2fs/inline.c | 27 ++------------------------- 3 files changed, 12 insertions(+), 37 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 84dd8980..cb8fe05f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -443,27 +443,23 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } -static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +int room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; next: - zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_start); - if (zero_start >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); + if (zero_start >= max_slots) + return max_slots; - zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - zero_start); + zero_end = find_next_bit_le(bitmap, max_slots, zero_start); if (zero_end - zero_start >= slots) return zero_start; bit_start = zero_end + 1; - if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + if (zero_end + 1 >= max_slots) + return max_slots; goto next; } @@ -525,7 +521,8 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, return PTR_ERR(dentry_page); dentry_blk = kmap(dentry_page); - bit_pos = room_for_filename(dentry_blk, slots); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 174ede79..48f37c80 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1248,6 +1248,7 @@ void set_de_type(struct f2fs_dir_entry *, struct inode *); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); +int room_for_filename(const void *, int, int); void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **, unsigned int flags); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 7b76b519..10ef7b78 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -363,30 +363,6 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, return 0; } -int room_in_inline_dir(struct f2fs_inline_dentry *dentry_blk, int slots) -{ - int bit_start = 0; - int zero_start, zero_end; -next: - zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, - NR_INLINE_DENTRY, - bit_start); - if (zero_start >= NR_INLINE_DENTRY) - return NR_INLINE_DENTRY; - - zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_INLINE_DENTRY, - zero_start); - if (zero_end - zero_start >= slots) - return zero_start; - - bit_start = zero_end + 1; - - if (zero_end + 1 >= NR_INLINE_DENTRY) - return NR_INLINE_DENTRY; - goto next; -} - static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, struct f2fs_inline_dentry *inline_dentry) { @@ -460,7 +436,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, return PTR_ERR(ipage); dentry_blk = inline_data_addr(ipage); - bit_pos = room_in_inline_dir(dentry_blk, slots); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_INLINE_DENTRY); if (bit_pos >= NR_INLINE_DENTRY) { err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); if (!err) From 37c8aabdd09477b64af87df9395c96e7a2378de6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 17:26:14 -0700 Subject: [PATCH 018/321] f2fs: reuse find_in_block code for find_in_inline_dir This patch removes redundant copied code in find_in_inline_dir. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/dir.c Change-Id: I48fad8f4b1a512b8a4f276930f021a221f8a0164 --- fs/f2fs/dir.c | 82 +++++++++++++++++++++++++++++------------------- fs/f2fs/f2fs.h | 6 ++-- fs/f2fs/inline.c | 56 ++++++++++++--------------------- 3 files changed, 74 insertions(+), 70 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index cb8fe05f..15d42bf8 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -82,7 +82,7 @@ static unsigned long dir_block_index(unsigned int level, return bidx; } -bool early_match_name(size_t namelen, f2fs_hash_t namehash, +static bool early_match_name(size_t namelen, f2fs_hash_t namehash, struct f2fs_dir_entry *de) { if (le16_to_cpu(de->name_len) != namelen) @@ -95,58 +95,76 @@ bool early_match_name(size_t namelen, f2fs_hash_t namehash, } static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - struct qstr *name, int *max_slots, - f2fs_hash_t namehash, struct page **res_page, - unsigned int flags) + struct qstr *name, int *max_slots, + struct page **res_page, + unsigned int flags) +{ + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + + *max_slots = NR_DENTRY_IN_BLOCK; + + dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); + de = find_target_dentry(name, max_slots, &dentry_blk->dentry_bitmap, + dentry_blk->dentry, + dentry_blk->filename, + flags); + if (de) + *res_page = dentry_page; + else + kunmap(dentry_page); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(F2FS_P_SB(dentry_page), *max_slots < 0); + return de; +} + +struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, + const void *bitmap, struct f2fs_dir_entry *dentry, + __u8 (*filenames)[F2FS_SLOT_LEN], unsigned int flags) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; - struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); - const void *dentry_bits = &dentry_blk->dentry_bitmap; + f2fs_hash_t namehash = f2fs_dentry_hash(name); + int max_bits = *max_slots; int max_len = 0; - while (bit_pos < NR_DENTRY_IN_BLOCK) { - if (!test_bit_le(bit_pos, dentry_bits)) { + *max_slots = 0; + while (bit_pos < max_bits) { + if (!test_bit_le(bit_pos, bitmap)) { if (bit_pos == 0) max_len = 1; - else if (!test_bit_le(bit_pos - 1, dentry_bits)) + else if (!test_bit_le(bit_pos - 1, bitmap)) max_len++; bit_pos++; continue; } - de = &dentry_blk->dentry[bit_pos]; - + de = &dentry[bit_pos]; if (flags & LOOKUP_NOCASE) { if ((le16_to_cpu(de->name_len) == name->len) && - !strncasecmp(dentry_blk->filename[bit_pos], - name->name, name->len)) { - *res_page = dentry_page; + !strncasecmp(filenames[bit_pos], + name->name, name->len)) goto found; - } - } else if (early_match_name(name->len, namehash, de)) { - if (!memcmp(dentry_blk->filename[bit_pos], - name->name, - name->len)) { - *res_page = dentry_page; - goto found; - } + } else if (early_match_name(name->len, namehash, de) && + !memcmp(filenames[bit_pos], name->name, name->len)) { + goto found; } - if (max_len > *max_slots) { + if (*max_slots >= 0 && max_len > *max_slots) { *max_slots = max_len; max_len = 0; } - /* - * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs has occurred. - */ - f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); + /* remain bug on condition */ + if (unlikely(!de->name_len)) + *max_slots = -1; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; - kunmap(dentry_page); found: if (max_len > *max_slots) *max_slots = max_len; @@ -164,7 +182,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; - int max_slots = 0; + int max_slots; f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); @@ -184,7 +202,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } de = find_in_block(dentry_page, name, &max_slots, - namehash, res_page, flags); + res_page, flags); if (de) break; @@ -217,7 +235,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct qstr *child, unsigned int level; if (f2fs_has_inline_dentry(dir)) - return find_in_inline_dir(dir, child, res_page); + return find_in_inline_dir(dir, child, res_page, flags); if (npages == 0) return NULL; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 48f37c80..d278ec2b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1243,8 +1243,10 @@ struct dentry *f2fs_get_parent(struct dentry *child); * dir.c */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; -bool early_match_name(size_t, f2fs_hash_t, struct f2fs_dir_entry *); void set_de_type(struct f2fs_dir_entry *, struct inode *); +struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, const void *, + struct f2fs_dir_entry *, __u8 (*)[F2FS_SLOT_LEN], + unsigned int); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); @@ -1553,7 +1555,7 @@ int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); void truncate_inline_data(struct inode *, u64); bool recover_inline_data(struct inode *, struct page *); struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, - struct page **); + struct page **, unsigned int); struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 10ef7b78..7b30a916 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -260,52 +260,36 @@ bool recover_inline_data(struct inode *inode, struct page *npage) } struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, - struct qstr *name, struct page **res_page) + struct qstr *name, struct page **res_page, + unsigned int flags) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - struct page *ipage; + struct f2fs_inline_dentry *inline_dentry; struct f2fs_dir_entry *de; - f2fs_hash_t namehash; - unsigned long bit_pos = 0; - struct f2fs_inline_dentry *dentry_blk; - const void *dentry_bits; + struct page *ipage; + int max_slots = NR_INLINE_DENTRY; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return NULL; - namehash = f2fs_dentry_hash(name); - - dentry_blk = inline_data_addr(ipage); - dentry_bits = &dentry_blk->dentry_bitmap; - - while (bit_pos < NR_INLINE_DENTRY) { - if (!test_bit_le(bit_pos, dentry_bits)) { - bit_pos++; - continue; - } - de = &dentry_blk->dentry[bit_pos]; - if (early_match_name(name->len, namehash, de)) { - if (!memcmp(dentry_blk->filename[bit_pos], - name->name, - name->len)) { - *res_page = ipage; - goto found; - } - } - - /* - * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs are occurred. - */ - f2fs_bug_on(F2FS_P_SB(ipage), !de->name_len); - - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - } + inline_dentry = inline_data_addr(ipage); - de = NULL; -found: + de = find_target_dentry(name, &max_slots, &inline_dentry->dentry_bitmap, + inline_dentry->dentry, + inline_dentry->filename, + flags); unlock_page(ipage); + if (de) + *res_page = ipage; + else + f2fs_put_page(ipage, 0); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(sbi, max_slots < 0); return de; } From dcc8b98d1d128df8231ea84afe3beaa3a7db0cf4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 19:34:26 -0700 Subject: [PATCH 019/321] f2fs: fix to wait correct block type The inode page needs to wait NODE block io. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 ++- fs/f2fs/inline.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 15d42bf8..c97dc2c0 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -298,8 +298,9 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode) { + enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, DATA); + f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); if (!f2fs_has_inline_dentry(dir)) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 7b30a916..f6793213 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -429,7 +429,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, goto out; } - f2fs_wait_on_page_writeback(ipage, DATA); + f2fs_wait_on_page_writeback(ipage, NODE); down_write(&F2FS_I(inode)->i_sem); page = init_inode_metadata(inode, dir, name); @@ -474,7 +474,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, int i; lock_page(page); - f2fs_wait_on_page_writeback(page, DATA); + f2fs_wait_on_page_writeback(page, NODE); inline_dentry = inline_data_addr(page); bit_pos = dentry - inline_dentry->dentry; From 65f3e6fd955e364da7354d43b645084adf50d2dc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 19:42:53 -0700 Subject: [PATCH 020/321] f2fs: avoid deadlock on init_inode_metadata Previously, init_inode_metadata does not hold any parent directory's inode page. So, f2fs_init_acl can grab its parent inode page without any problem. But, when we use inline_dentry, that page is grabbed during f2fs_add_link, so that we can fall into deadlock condition like below. INFO: task mknod:11006 blocked for more than 120 seconds. Tainted: G OE 3.17.0-rc1+ #13 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. mknod D ffff88003fc94580 0 11006 11004 0x00000000 ffff880007717b10 0000000000000002 ffff88003c323220 ffff880007717fd8 0000000000014580 0000000000014580 ffff88003daecb30 ffff88003c323220 ffff88003fc94e80 ffff88003ffbb4e8 ffff880007717ba0 0000000000000002 Call Trace: [] ? bit_wait+0x50/0x50 [] io_schedule+0x9d/0x130 [] bit_wait_io+0x2c/0x50 [] __wait_on_bit_lock+0x4b/0xb0 [] __lock_page+0x67/0x70 [] ? autoremove_wake_function+0x40/0x40 [] pagecache_get_page+0x14c/0x1e0 [] get_node_page+0x59/0x130 [f2fs] [] read_all_xattrs+0x24d/0x430 [f2fs] [] f2fs_getxattr+0x52/0xe0 [f2fs] [] f2fs_get_acl+0x41/0x2d0 [f2fs] [] get_acl+0x47/0x70 [] posix_acl_create+0x5a/0x150 [] f2fs_init_acl+0x29/0xcb [f2fs] [] init_inode_metadata+0x5d/0x340 [f2fs] [] f2fs_add_inline_entry+0x12a/0x2e0 [f2fs] [] __f2fs_add_link+0x45/0x4a0 [f2fs] [] ? f2fs_new_inode+0x146/0x220 [f2fs] [] f2fs_mknod+0x86/0xf0 [f2fs] [] vfs_mknod+0xe1/0x160 [] SyS_mknod+0x1f6/0x200 [] tracesys+0xe1/0xe6 Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/acl.c fs/f2fs/acl.h Change-Id: Icf4a1343fac1864e537913849c0e3d67c7e83504 --- fs/f2fs/acl.c | 18 +++++++++++++----- fs/f2fs/acl.h | 5 +++-- fs/f2fs/dir.c | 10 +++++----- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inline.c | 6 +++--- fs/f2fs/xattr.c | 6 +++--- fs/f2fs/xattr.h | 6 ++++-- 7 files changed, 32 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 81890804..fbae6b5e 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -162,7 +162,8 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) return ERR_PTR(-EINVAL); } -struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, + struct page *dpage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; @@ -180,12 +181,13 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) if (type == ACL_TYPE_ACCESS) name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); if (retval > 0) { value = kmalloc(retval, GFP_F2FS_ZERO); if (!value) return ERR_PTR(-ENOMEM); - retval = f2fs_getxattr(inode, name_index, "", value, retval); + retval = f2fs_getxattr(inode, name_index, "", value, + retval, dpage); } if (retval > 0) @@ -202,6 +204,11 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) return acl; } +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + return __f2fs_get_acl(inode, type, NULL); +} + static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { @@ -258,7 +265,8 @@ static int f2fs_set_acl(struct inode *inode, int type, return error; } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, + struct page *dpage) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct posix_acl *acl = NULL; @@ -266,7 +274,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) if (!S_ISLNK(inode->i_mode)) { if (test_opt(sbi, POSIX_ACL)) { - acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT); + acl = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); if (IS_ERR(acl)) return PTR_ERR(acl); } diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 49633131..b4ba6866 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -38,7 +38,8 @@ struct f2fs_acl_header { extern struct posix_acl *f2fs_get_acl(struct inode *, int); extern int f2fs_acl_chmod(struct inode *); -extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); +extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, + struct page *); #else #define f2fs_check_acl NULL #define f2fs_get_acl NULL @@ -50,7 +51,7 @@ static inline int f2fs_acl_chmod(struct inode *inode) } static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, - struct page *page) + struct page *ipage, struct page *dpage) { return 0; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c97dc2c0..e14889df 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -378,8 +378,8 @@ static int make_empty_dir(struct inode *inode, return 0; } -struct page *init_inode_metadata(struct inode *inode, - struct inode *dir, const struct qstr *name) +struct page *init_inode_metadata(struct inode *inode, struct inode *dir, + const struct qstr *name, struct page *dpage) { struct page *page; int err; @@ -395,7 +395,7 @@ struct page *init_inode_metadata(struct inode *inode, goto error; } - err = f2fs_init_acl(inode, dir, page); + err = f2fs_init_acl(inode, dir, page, dpage); if (err) goto put_error; @@ -556,7 +556,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, f2fs_wait_on_page_writeback(dentry_page, DATA); down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name); + page = init_inode_metadata(inode, dir, name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -595,7 +595,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, NULL); + page = init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d278ec2b..d9517637 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1248,7 +1248,7 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, const void *, struct f2fs_dir_entry *, __u8 (*)[F2FS_SLOT_LEN], unsigned int); struct page *init_inode_metadata(struct inode *, struct inode *, - const struct qstr *); + const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); int room_for_filename(const void *, int, int); void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index f6793213..7899548d 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -429,14 +429,14 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, goto out; } - f2fs_wait_on_page_writeback(ipage, NODE); - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name); + page = init_inode_metadata(inode, dir, name, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; } + + f2fs_wait_on_page_writeback(ipage, NODE); de = &dentry_blk->dentry[bit_pos]; de->hash_code = name_hash; de->name_len = cpu_to_le16(namelen); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 55be5934..2f49a58a 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -82,7 +82,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL); } static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, @@ -397,7 +397,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } int f2fs_getxattr(struct inode *inode, int index, const char *name, - void *buffer, size_t buffer_size) + void *buffer, size_t buffer_size, struct page *ipage) { struct f2fs_xattr_entry *entry; void *base_addr; @@ -411,7 +411,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - base_addr = read_all_xattrs(inode, NULL); + base_addr = read_all_xattrs(inode, ipage); if (!base_addr) return -ENOMEM; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 9b18c07a..95b55a0c 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -117,7 +117,8 @@ extern const struct xattr_handler *f2fs_xattr_handlers[]; extern int f2fs_setxattr(struct inode *, int, const char *, const void *, size_t, struct page *, int); -extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, + size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else @@ -128,7 +129,8 @@ static inline int f2fs_setxattr(struct inode *inode, int index, return -EOPNOTSUPP; } static inline int f2fs_getxattr(struct inode *inode, int index, - const char *name, void *buffer, size_t buffer_size) + const char *name, void *buffer, + size_t buffer_size, struct page *dpage) { return -EOPNOTSUPP; } From b9e44449e707bf1927552cac5310611e897d4c3a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 20:00:16 -0700 Subject: [PATCH 021/321] f2fs: add stat info for inline_dentry inodes This patch adds status information for inline_dentry inodes. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 +++ fs/f2fs/f2fs.h | 16 ++++++++++++++-- fs/f2fs/inline.c | 2 +- fs/f2fs/inode.c | 3 +++ fs/f2fs/namei.c | 1 + 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0a91ab81..86e6e926 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -46,6 +46,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); si->inline_inode = sbi->inline_inode; + si->inline_dir = sbi->inline_dir; si->utilization = utilization(sbi); si->free_segs = free_segments(sbi); @@ -200,6 +201,8 @@ static int stat_show(struct seq_file *s, void *v) si->valid_count - si->valid_node_count); seq_printf(s, " - Inline_data Inode: %u\n", si->inline_inode); + seq_printf(s, " - Inline_dentry Inode: %u\n", + si->inline_dir); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d9517637..5bec5622 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -560,6 +560,7 @@ struct f2fs_sb_info { unsigned int block_count[2]; /* # of allocated blocks */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ int inline_inode; /* # of inline_data inodes */ + int inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ unsigned int n_dirty_dirs; /* # of dir inodes */ #endif @@ -1435,7 +1436,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, sits, fnids; int total_count, utilization; - int bg_gc, inline_inode; + int bg_gc, inline_inode, inline_dir; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -1475,7 +1476,16 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_data(inode)) \ ((F2FS_I_SB(inode))->inline_inode--); \ } while (0) - +#define stat_inc_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + ((F2FS_I_SB(inode))->inline_dir++); \ + } while (0) +#define stat_dec_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + ((F2FS_I_SB(inode))->inline_dir--); \ + } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -1522,6 +1532,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_read_hit(sb) #define stat_inc_inline_inode(inode) #define stat_dec_inline_inode(inode) +#define stat_inc_inline_dir(inode) +#define stat_dec_inline_dir(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) #define stat_inc_seg_count(si, type) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 7899548d..d34516bf 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -384,8 +384,8 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, /* clear inline dir and flag after data writeback */ zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); + stat_dec_inline_dir(dir); clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); - stat_dec_inline_inode(dir); if (i_size_read(dir) < PAGE_CACHE_SIZE) { i_size_write(dir, PAGE_CACHE_SIZE); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c8b71ec7..703ee904 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -168,6 +168,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) goto bad_inode; } unlock_new_inode(inode); + stat_inc_inline_dir(inode); trace_f2fs_iget(inode); return inode; @@ -299,6 +300,7 @@ void f2fs_evict_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); no_delete: + stat_dec_inline_dir(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); @@ -326,6 +328,7 @@ void handle_failed_inode(struct inode *inode) remove_inode_page(inode); stat_dec_inline_inode(inode); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index ca9b8ab4..3adec0a9 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -298,6 +298,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; f2fs_unlock_op(sbi); + stat_inc_inline_dir(inode); alloc_nid_done(sbi, inode->i_ino); d_instantiate(dentry, inode); From b02a3b88f6b9db0e1020ccca4220a67875bbc59f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 14 Oct 2014 10:29:50 -0700 Subject: [PATCH 022/321] f2fs: fix counting inline_data inode numbers This patch fixes wrongly counting inline_data inode numbers. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 4 ++-- fs/f2fs/namei.c | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 703ee904..94470b65 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -168,6 +168,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) goto bad_inode; } unlock_new_inode(inode); + stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); trace_f2fs_iget(inode); return inode; @@ -295,12 +296,12 @@ void f2fs_evict_inode(struct inode *inode) f2fs_lock_op(sbi); remove_inode_page(inode); - stat_dec_inline_inode(inode); f2fs_unlock_op(sbi); sb_end_intwrite(inode->i_sb); no_delete: stat_dec_inline_dir(inode); + stat_dec_inline_inode(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); @@ -326,7 +327,6 @@ void handle_failed_inode(struct inode *inode) f2fs_truncate(inode); remove_inode_page(inode); - stat_dec_inline_inode(inode); clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 3adec0a9..6b277d7a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -199,8 +199,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); - - stat_inc_inline_inode(inode); } return d_splice_alias(inode, dentry); From cc6d0bafef86c5b948e08367169d1636ef005704 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Oct 2014 21:29:51 -0700 Subject: [PATCH 023/321] f2fs: reuse core function in f2fs_readdir for inline_dentry This patch introduces a core function, f2fs_fill_dentries, to remove redundant code in f2fs_readdir and f2fs_read_inline_dir. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/dir.c fs/f2fs/f2fs.h fs/f2fs/inline.c Change-Id: I40a47248235052c5ef9b53d470fb97e20642663e --- fs/f2fs/dir.c | 78 ++++++++++++++++++++++++++++-------------------- fs/f2fs/f2fs.h | 4 +++ fs/f2fs/inline.c | 47 ++++++----------------------- 3 files changed, 58 insertions(+), 71 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index e14889df..c8c2666e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -721,26 +721,55 @@ bool f2fs_empty_dir(struct inode *dir) return true; } +bool f2fs_fill_dentries(struct file *file, void *dirent, filldir_t filldir, + const void *bitmap, struct f2fs_dir_entry *dentry, + __u8 (*filenames)[F2FS_SLOT_LEN], int max, + unsigned int n, unsigned int bit_pos) +{ + unsigned int start_bit_pos = bit_pos; + unsigned char d_type; + struct f2fs_dir_entry *de = NULL; + unsigned char *types = f2fs_filetype_table; + int over; + + while (bit_pos < max) { + d_type = DT_UNKNOWN; + bit_pos = find_next_bit_le(bitmap, max, bit_pos); + if (bit_pos >= max) + break; + + de = &dentry[bit_pos]; + if (types && de->file_type < F2FS_FT_MAX) + d_type = types[de->file_type]; + + over = filldir(dirent, filenames[bit_pos], + le16_to_cpu(de->name_len), + (n * max) + bit_pos, + le32_to_cpu(de->ino), d_type); + if (over) { + file->f_pos += bit_pos - start_bit_pos; + return true; + } + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + } + return false; +} + static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) { unsigned long pos = file->f_pos; - unsigned char *types = NULL; - unsigned int bit_pos = 0, start_bit_pos = 0; - int over = 0; + unsigned int bit_pos = 0; struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); struct f2fs_dentry_block *dentry_blk = NULL; - struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; unsigned int n = 0; - unsigned char d_type = DT_UNKNOWN; - int slots; if (f2fs_has_inline_dentry(inode)) return f2fs_read_inline_dir(file, dirent, filldir); - types = f2fs_filetype_table; bit_pos = (pos % NR_DENTRY_IN_BLOCK); n = (pos / NR_DENTRY_IN_BLOCK); @@ -754,35 +783,18 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) if (IS_ERR(dentry_page)) continue; - start_bit_pos = bit_pos; dentry_blk = kmap(dentry_page); - while (bit_pos < NR_DENTRY_IN_BLOCK) { - d_type = DT_UNKNOWN; - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_pos); - if (bit_pos >= NR_DENTRY_IN_BLOCK) - break; - - de = &dentry_blk->dentry[bit_pos]; - if (types && de->file_type < F2FS_FT_MAX) - d_type = types[de->file_type]; - - over = filldir(dirent, - dentry_blk->filename[bit_pos], - le16_to_cpu(de->name_len), - (n * NR_DENTRY_IN_BLOCK) + bit_pos, - le32_to_cpu(de->ino), d_type); - if (over) { - file->f_pos += bit_pos - start_bit_pos; - goto stop; - } - slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - bit_pos += slots; - } + + if (f2fs_fill_dentries(file, dirent, filldir, + &dentry_blk->dentry_bitmap, dentry_blk->dentry, + dentry_blk->filename, + NR_DENTRY_IN_BLOCK, + n, bit_pos)) + goto stop; + bit_pos = 0; - kunmap(dentry_page); file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; + kunmap(dentry_page); f2fs_put_page(dentry_page, 1); dentry_page = NULL; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5bec5622..55b4e393 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1248,6 +1248,10 @@ void set_de_type(struct f2fs_dir_entry *, struct inode *); struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, const void *, struct f2fs_dir_entry *, __u8 (*)[F2FS_SLOT_LEN], unsigned int); +bool f2fs_fill_dentries(struct file *, void *, filldir_t, + const void *, struct f2fs_dir_entry *, + __u8 (*)[F2FS_SLOT_LEN], int, + unsigned int, unsigned int); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index d34516bf..981ef37c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -519,58 +519,29 @@ bool f2fs_empty_inline_dir(struct inode *dir) int f2fs_read_inline_dir(struct file *file, void *dirent, filldir_t filldir) { unsigned long pos = file->f_pos; - unsigned char *types = NULL; - unsigned int bit_pos = 0, start_bit_pos = 0; - int over = 0; + unsigned int bit_pos = 0; struct inode *inode = file_inode(file); struct f2fs_inline_dentry *inline_dentry = NULL; struct page *ipage = NULL; - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_dir_entry *de = NULL; - unsigned char d_type = DT_UNKNOWN; - int slots; if (pos >= NR_INLINE_DENTRY) return 0; - types = f2fs_filetype_table; bit_pos = (pos % NR_INLINE_DENTRY); - ipage = get_node_page(sbi, inode->i_ino); + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); - start_bit_pos = bit_pos; inline_dentry = inline_data_addr(ipage); - while (bit_pos < NR_INLINE_DENTRY) { - d_type = DT_UNKNOWN; - bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap, - NR_INLINE_DENTRY, - bit_pos); - if (bit_pos >= NR_INLINE_DENTRY) - break; - - de = &inline_dentry->dentry[bit_pos]; - if (types && de->file_type < F2FS_FT_MAX) - d_type = types[de->file_type]; - - over = filldir(dirent, - inline_dentry->filename[bit_pos], - le16_to_cpu(de->name_len), - bit_pos, - le32_to_cpu(de->ino), d_type); - if (over) { - file->f_pos += bit_pos - start_bit_pos; - goto out; - - } - slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - bit_pos += slots; - } - file->f_pos = NR_INLINE_DENTRY; -out: - f2fs_put_page(ipage, 1); + if (!f2fs_fill_dentries(file, dirent, filldir, + &inline_dentry->dentry_bitmap, + inline_dentry->dentry, + inline_dentry->filename, + NR_INLINE_DENTRY, 0, bit_pos)) + file->f_pos = NR_INLINE_DENTRY; + f2fs_put_page(ipage, 1); return 0; } From 28e065754be7e44b0b49f1c0284a54f35ca83098 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Oct 2014 12:24:14 -0700 Subject: [PATCH 024/321] f2fs: should not truncate any inline_dentry If the inode has inline_dentry, we should not truncate any block indices. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 057c6dbe..658b02ea 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -488,7 +488,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); - if (f2fs_has_inline_data(inode)) + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) goto done; free_from = (pgoff_t) From 694efffdb72d4ac57f92eede01a835716fb11bb0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 18 Oct 2014 22:52:52 -0700 Subject: [PATCH 025/321] f2fs: introduce f2fs_dentry_ptr structure for code clean-up This patch introduces f2fs_dentry_ptr structure for the use of a function parameter in inline_dentry operations. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/dir.c fs/f2fs/f2fs.h fs/f2fs/inline.c Change-Id: I2cbe27982f25257f3b7ed1987a789c4ce6ec6724 --- fs/f2fs/dir.c | 64 +++++++++++++++++++++++------------------------- fs/f2fs/f2fs.h | 35 +++++++++++++++++++++----- fs/f2fs/inline.c | 20 +++++++-------- 3 files changed, 68 insertions(+), 51 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c8c2666e..31779169 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -101,14 +101,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, { struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; - - *max_slots = NR_DENTRY_IN_BLOCK; + struct f2fs_dentry_ptr d; dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); - de = find_target_dentry(name, max_slots, &dentry_blk->dentry_bitmap, - dentry_blk->dentry, - dentry_blk->filename, - flags); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + de = find_target_dentry(name, max_slots, &d, flags); + if (de) *res_page = dentry_page; else @@ -118,55 +117,55 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, * For the most part, it should be a bug when name_len is zero. * We stop here for figuring out where the bugs has occurred. */ - f2fs_bug_on(F2FS_P_SB(dentry_page), *max_slots < 0); + f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0); return de; } struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, - const void *bitmap, struct f2fs_dir_entry *dentry, - __u8 (*filenames)[F2FS_SLOT_LEN], unsigned int flags) + struct f2fs_dentry_ptr *d, unsigned int flags) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; f2fs_hash_t namehash = f2fs_dentry_hash(name); - int max_bits = *max_slots; int max_len = 0; - *max_slots = 0; - while (bit_pos < max_bits) { - if (!test_bit_le(bit_pos, bitmap)) { + if (max_slots) + *max_slots = 0; + while (bit_pos < d->max) { + if (!test_bit_le(bit_pos, d->bitmap)) { if (bit_pos == 0) max_len = 1; - else if (!test_bit_le(bit_pos - 1, bitmap)) + else if (!test_bit_le(bit_pos - 1, d->bitmap)) max_len++; bit_pos++; continue; } - de = &dentry[bit_pos]; + de = &d->dentry[bit_pos]; if (flags & LOOKUP_NOCASE) { if ((le16_to_cpu(de->name_len) == name->len) && - !strncasecmp(filenames[bit_pos], + !strncasecmp(d->filename[bit_pos], name->name, name->len)) goto found; } else if (early_match_name(name->len, namehash, de) && - !memcmp(filenames[bit_pos], name->name, name->len)) { + !memcmp(d->filename[bit_pos], name->name, name->len)) { goto found; } - if (*max_slots >= 0 && max_len > *max_slots) { + + if (max_slots && *max_slots >= 0 && max_len > *max_slots) { *max_slots = max_len; max_len = 0; } /* remain bug on condition */ if (unlikely(!de->name_len)) - *max_slots = -1; + d->max = -1; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; found: - if (max_len > *max_slots) + if (max_slots && max_len > *max_slots) *max_slots = max_len; return de; } @@ -722,9 +721,7 @@ bool f2fs_empty_dir(struct inode *dir) } bool f2fs_fill_dentries(struct file *file, void *dirent, filldir_t filldir, - const void *bitmap, struct f2fs_dir_entry *dentry, - __u8 (*filenames)[F2FS_SLOT_LEN], int max, - unsigned int n, unsigned int bit_pos) + struct f2fs_dentry_ptr *d, unsigned int n, unsigned int bit_pos) { unsigned int start_bit_pos = bit_pos; unsigned char d_type; @@ -732,19 +729,19 @@ bool f2fs_fill_dentries(struct file *file, void *dirent, filldir_t filldir, unsigned char *types = f2fs_filetype_table; int over; - while (bit_pos < max) { + while (bit_pos < d->max) { d_type = DT_UNKNOWN; - bit_pos = find_next_bit_le(bitmap, max, bit_pos); - if (bit_pos >= max) + bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); + if (bit_pos >= d->max) break; - de = &dentry[bit_pos]; + de = &d->dentry[bit_pos]; if (types && de->file_type < F2FS_FT_MAX) d_type = types[de->file_type]; - over = filldir(dirent, filenames[bit_pos], + over = filldir(dirent, d->filename[bit_pos], le16_to_cpu(de->name_len), - (n * max) + bit_pos, + (n * d->max) + bit_pos, le32_to_cpu(de->ino), d_type); if (over) { file->f_pos += bit_pos - start_bit_pos; @@ -765,6 +762,7 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) struct f2fs_dentry_block *dentry_blk = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; + struct f2fs_dentry_ptr d; unsigned int n = 0; if (f2fs_has_inline_dentry(inode)) @@ -785,11 +783,9 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) dentry_blk = kmap(dentry_page); - if (f2fs_fill_dentries(file, dirent, filldir, - &dentry_blk->dentry_bitmap, dentry_blk->dentry, - dentry_blk->filename, - NR_DENTRY_IN_BLOCK, - n, bit_pos)) + make_dentry_ptr(&d, (void *)dentry_blk, 1); + + if (f2fs_fill_dentries(file, dirent, filldir, &d, n, bit_pos)) goto stop; bit_pos = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 55b4e393..0a24b22a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -212,6 +212,32 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, /* * For INODE and NODE manager */ +/* for directory operations */ +struct f2fs_dentry_ptr { + const void *bitmap; + struct f2fs_dir_entry *dentry; + __u8 (*filename)[F2FS_SLOT_LEN]; + int max; +}; + +static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d, + void *src, int type) +{ + if (type == 1) { + struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; + d->max = NR_DENTRY_IN_BLOCK; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } else { + struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; + d->max = NR_INLINE_DENTRY; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } +} + /* * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 * as its node offset to distinguish from index node blocks. @@ -1245,13 +1271,10 @@ struct dentry *f2fs_get_parent(struct dentry *child); */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; void set_de_type(struct f2fs_dir_entry *, struct inode *); -struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, const void *, - struct f2fs_dir_entry *, __u8 (*)[F2FS_SLOT_LEN], - unsigned int); +struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, + struct f2fs_dentry_ptr *, unsigned int); bool f2fs_fill_dentries(struct file *, void *, filldir_t, - const void *, struct f2fs_dir_entry *, - __u8 (*)[F2FS_SLOT_LEN], int, - unsigned int, unsigned int); + struct f2fs_dentry_ptr *, unsigned int, unsigned int); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 981ef37c..b261f26d 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -266,8 +266,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct f2fs_inline_dentry *inline_dentry; struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; struct page *ipage; - int max_slots = NR_INLINE_DENTRY; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) @@ -275,10 +275,9 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(ipage); - de = find_target_dentry(name, &max_slots, &inline_dentry->dentry_bitmap, - inline_dentry->dentry, - inline_dentry->filename, - flags); + make_dentry_ptr(&d, (void *)inline_dentry, 2); + de = find_target_dentry(name, NULL, &d, flags); + unlock_page(ipage); if (de) *res_page = ipage; @@ -289,7 +288,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, * For the most part, it should be a bug when name_len is zero. * We stop here for figuring out where the bugs has occurred. */ - f2fs_bug_on(sbi, max_slots < 0); + f2fs_bug_on(sbi, d.max < 0); return de; } @@ -523,6 +522,7 @@ int f2fs_read_inline_dir(struct file *file, void *dirent, filldir_t filldir) struct inode *inode = file_inode(file); struct f2fs_inline_dentry *inline_dentry = NULL; struct page *ipage = NULL; + struct f2fs_dentry_ptr d; if (pos >= NR_INLINE_DENTRY) return 0; @@ -535,11 +535,9 @@ int f2fs_read_inline_dir(struct file *file, void *dirent, filldir_t filldir) inline_dentry = inline_data_addr(ipage); - if (!f2fs_fill_dentries(file, dirent, filldir, - &inline_dentry->dentry_bitmap, - inline_dentry->dentry, - inline_dentry->filename, - NR_INLINE_DENTRY, 0, bit_pos)) + make_dentry_ptr(&d, (void *)inline_dentry, 2); + + if (!f2fs_fill_dentries(file, dirent, filldir, &d, 0, bit_pos)) file->f_pos = NR_INLINE_DENTRY; f2fs_put_page(ipage, 1); From b02531f7da052349ead9e551eacccd84718644a4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 18 Oct 2014 23:06:41 -0700 Subject: [PATCH 026/321] f2fs: reuse make_empty_dir code for inline_dentry This patch introduces do_make_empty_dir to mitigate code redundancy for inline_dentry. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/f2fs.h Change-Id: I4cbb1a287cf2b92b773acd2a413220cab73e68c7 --- fs/f2fs/dir.c | 42 ++++++++++++++++++++++++++---------------- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/inline.c | 20 +++----------------- 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 31779169..6759af7e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -338,12 +338,35 @@ int update_dent_inode(struct inode *inode, const struct qstr *name) return 0; } +void do_make_empty_dir(struct inode *inode, struct inode *parent, + struct f2fs_dentry_ptr *d) +{ + struct f2fs_dir_entry *de; + + de = &d->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(d->filename[0], ".", 1); + set_de_type(de, inode); + + de = &d->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(d->filename[1], "..", 2); + set_de_type(de, inode); + + test_and_set_bit_le(0, (void *)d->bitmap); + test_and_set_bit_le(1, (void *)d->bitmap); +} + static int make_empty_dir(struct inode *inode, struct inode *parent, struct page *page) { struct page *dentry_page; struct f2fs_dentry_block *dentry_blk; - struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; if (f2fs_has_inline_dentry(inode)) return make_empty_inline_dir(inode, parent, page); @@ -354,22 +377,9 @@ static int make_empty_dir(struct inode *inode, dentry_blk = kmap_atomic(dentry_page); - de = &dentry_blk->dentry[0]; - de->name_len = cpu_to_le16(1); - de->hash_code = 0; - de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); - set_de_type(de, inode); - - de = &dentry_blk->dentry[1]; - de->hash_code = 0; - de->name_len = cpu_to_le16(2); - de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); - set_de_type(de, inode); + make_dentry_ptr(&d, (void *)dentry_blk, 1); + do_make_empty_dir(inode, parent, &d); - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); kunmap_atomic(dentry_blk); set_page_dirty(dentry_page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0a24b22a..2c2d86e1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1275,6 +1275,8 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, struct f2fs_dentry_ptr *, unsigned int); bool f2fs_fill_dentries(struct file *, void *, filldir_t, struct f2fs_dentry_ptr *, unsigned int, unsigned int); +void do_make_empty_dir(struct inode *, struct inode *, + struct f2fs_dentry_ptr *); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index b261f26d..39db1998 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -315,26 +315,12 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { struct f2fs_inline_dentry *dentry_blk; - struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; dentry_blk = inline_data_addr(ipage); - de = &dentry_blk->dentry[0]; - de->name_len = cpu_to_le16(1); - de->hash_code = 0; - de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); - set_de_type(de, inode); - - de = &dentry_blk->dentry[1]; - de->hash_code = 0; - de->name_len = cpu_to_le16(2); - de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); - set_de_type(de, inode); - - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + make_dentry_ptr(&d, (void *)dentry_blk, 2); + do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); From 3f2c173f410ee88ec27a5144e7030457e7747da8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 18 Oct 2014 23:41:38 -0700 Subject: [PATCH 027/321] f2fs: use kmap_atomic instead of kmap For better performance, we need to use kmap_atomic instead of kmap. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 39db1998..b84c8579 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -55,11 +55,10 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) /* Copy the whole inline data block */ src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); + dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); + kunmap_atomic(dst_addr); f2fs_put_page(ipage, 1); - out: SetPageUptodate(page); unlock_page(page); @@ -105,9 +104,9 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) /* Copy the whole inline data block */ src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); + dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); + kunmap_atomic(dst_addr); SetPageUptodate(page); /* write data page to try to make data consistent */ @@ -177,10 +176,10 @@ int f2fs_write_inline_data(struct inode *inode, f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); - src_addr = kmap(page); + src_addr = kmap_atomic(page); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, size); - kunmap(page); + kunmap_atomic(src_addr); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); sync_inode_page(&dn); @@ -352,7 +351,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, f2fs_wait_on_page_writeback(page, DATA); zero_user_segment(page, 0, PAGE_CACHE_SIZE); - dentry_blk = kmap(page); + dentry_blk = kmap_atomic(page); /* copy data from inline dentry block to new dentry block */ memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, @@ -362,7 +361,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, memcpy(dentry_blk->filename, inline_dentry->filename, NR_INLINE_DENTRY * F2FS_SLOT_LEN); - kunmap(page); + kunmap_atomic(dentry_blk); SetPageUptodate(page); set_page_dirty(page); From 9924622bbc490c1040c71d8f709ea4faaa731623 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Oct 2014 14:07:33 +0200 Subject: [PATCH 028/321] f2fs: avoid returning uninitialized value to userspace from f2fs_trim_fs() If user specifies too low end sector for trimming, f2fs_trim_fs() will use uninitialized value as a number of trimmed blocks and returns it to userspace. Initialize number of trimmed blocks early to avoid the problem. Coverity-id: 1248809 CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fdb0d3f6..7cbc89ac 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1079,6 +1079,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) range->len < sbi->blocksize) return -EINVAL; + cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -1090,7 +1091,6 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_start = start_segno; cpc.trim_end = end_segno; cpc.trim_minlen = range->minlen >> sbi->log_blocksize; - cpc.trimmed = 0; /* do checkpoint to issue discard commands safely */ write_checkpoint(sbi, &cpc); From 1d295d87cdd742eb7b73975bd879a457e07a044f Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:52 +0800 Subject: [PATCH 029/321] f2fs: remove the redundant function cond_clear_inode_flag Use clear_inode_flag to replace the redundant cond_clear_inode_flag. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 4 ++-- fs/f2fs/f2fs.h | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index fbae6b5e..c6aa0418 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -250,7 +250,7 @@ static int f2fs_set_acl(struct inode *inode, int type, if (acl) { value = f2fs_acl_to_disk(acl, &size); if (IS_ERR(value)) { - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return (int)PTR_ERR(value); } } @@ -261,7 +261,7 @@ static int f2fs_set_acl(struct inode *inode, int type, if (!error) set_cached_acl(inode, type, acl); - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return error; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2c2d86e1..09d395aa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1117,15 +1117,6 @@ static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) set_inode_flag(fi, FI_ACL_MODE); } -static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) -{ - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - clear_inode_flag(fi, FI_ACL_MODE); - return 1; - } - return 0; -} - static inline void get_inline_info(struct f2fs_inode_info *fi, struct f2fs_inode *ri) { From a190250b6638ebfe3ed9fc13f7d6257f607f1b75 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:48 +0800 Subject: [PATCH 030/321] f2fs: remove the seems unneeded argument 'type' from __get_victim Remove the unneeded argument 'type' from __get_victim, use NO_CHECK_TYPE directly when calling v_ops->get_victim(). Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2a8f4acd..7151d7de 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -646,12 +646,14 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type, int type) + int gc_type) { struct sit_info *sit_i = SIT_I(sbi); int ret; + mutex_lock(&sit_i->sentry_lock); - ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, + NO_CHECK_TYPE, LFS); mutex_unlock(&sit_i->sentry_lock); return ret; } @@ -709,7 +711,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) write_checkpoint(sbi, &cpc); } - if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + if (!__get_victim(sbi, &segno, gc_type)) goto stop; ret = 0; From bf7141619f0acd336a00002fa50e070571df8b21 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:50 +0800 Subject: [PATCH 031/321] f2fs: introduce f2fs_change_bit to simplify the change bit logic Introduce f2fs_change_bit to simplify the change bit logic in function set_to_next_nat{sit}. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 9 +++++++++ fs/f2fs/node.h | 5 +---- fs/f2fs/segment.h | 5 +---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 09d395aa..4015521a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1073,6 +1073,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) return ret; } +static inline void f2fs_change_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr ^= mask; +} + /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8d5e6e0d..acb71e50 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -192,10 +192,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) { unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); - if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - f2fs_clear_bit(block_off, nm_i->nat_bitmap); - else - f2fs_set_bit(block_off, nm_i->nat_bitmap); + f2fs_change_bit(block_off, nm_i->nat_bitmap); } static inline void fill_node_footer(struct page *page, nid_t nid, diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2495bec1..6723ccc1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -657,10 +657,7 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) { unsigned int block_off = SIT_BLOCK_OFFSET(start); - if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) - f2fs_clear_bit(block_off, sit_i->sit_bitmap); - else - f2fs_set_bit(block_off, sit_i->sit_bitmap); + f2fs_change_bit(block_off, sit_i->sit_bitmap); } static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) From 529da1eb0cfa11d93123925b3932f7c3b1fc6c62 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:51 +0800 Subject: [PATCH 032/321] f2fs: rename f2fs_set/clear_bit to f2fs_test_and_set/clear_bit Rename f2fs_set/clear_bit to f2fs_test_and_set/clear_bit, which mean set/clear bit and return the old value, for better readability. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/segment.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4015521a..2ef1924f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1049,7 +1049,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) return mask & *addr; } -static inline int f2fs_set_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1061,7 +1061,7 @@ static inline int f2fs_set_bit(unsigned int nr, char *addr) return ret; } -static inline int f2fs_clear_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr) { int mask; int ret; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7cbc89ac..cf6afd12 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -695,10 +695,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_set_bit(offset, se->cur_valid_map)) + if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } else { - if (!f2fs_clear_bit(offset, se->cur_valid_map)) + if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) From 544a14265e2e5a8e7ee6f6c5db7529db4ed10606 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:53 +0800 Subject: [PATCH 033/321] f2fs: set raw_super default to NULL to avoid compile warning Set raw_super default to NULL to avoid the possibly used uninitialized warning, though we may never hit it in fact. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 73993a9e..6c5fc760 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -930,7 +930,7 @@ static int read_raw_super_block(struct super_block *sb, static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; - struct f2fs_super_block *raw_super; + struct f2fs_super_block *raw_super = NULL; struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; From e810873bf6bbdab9a3f217b16466beeeab13beb5 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:49 +0800 Subject: [PATCH 034/321] f2fs: use current_sit_addr to replace the open code Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cf6afd12..3c410203 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1599,17 +1599,7 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(segno); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, segno); - - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) - blk_addr += sit_i->sit_blocks; - - return get_meta_page(sbi, blk_addr); + return get_meta_page(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, From da957ad0b52647188a6653841d11f96894a75cb7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 22 Oct 2014 15:21:47 +0200 Subject: [PATCH 035/321] f2fs: fix possible data corruption in f2fs_write_begin() f2fs_write_begin() doesn't initialize the 'dn' variable if the inode has inline data. However it uses its contents to decide whether it should just zero out the page or load data to it. Thus if we are unlucky we can zero out page contents instead of loading inline data into a page. CC: stable@vger.kernel.org CC: Changman Lee Signed-off-by: Jan Kara Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 71a29744..0a504802 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1017,21 +1017,19 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, goto out; } - if (dn.data_blkaddr == NEW_ADDR) { + if (f2fs_has_inline_data(inode)) { + err = f2fs_read_inline_data(inode, page); + if (err) { + page_cache_release(page); + goto fail; + } + } else if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - if (f2fs_has_inline_data(inode)) { - err = f2fs_read_inline_data(inode, page); - if (err) { - page_cache_release(page); - goto fail; - } - } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); - if (err) - goto fail; - } + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + READ_SYNC); + if (err) + goto fail; lock_page(page); if (unlikely(!PageUptodate(page))) { From 4dd9ac01a3afe0249b2eada2bb1ad5c8be36aaaf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Oct 2014 14:28:39 -0700 Subject: [PATCH 036/321] f2fs: use mark_page_accessed for old kernel version The mark_page_accessed was removed by recent kernel version, so we shoud use this for old kernels. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 + fs/f2fs/node.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ca514d59..ff377bc3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -69,6 +69,7 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) goto repeat; } out: + mark_page_accessed(page); return page; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e73ce18d..f89b73a5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1042,6 +1042,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) goto repeat; } got_it: + mark_page_accessed(page); return page; } @@ -1096,6 +1097,7 @@ struct page *get_node_page_ra(struct page *parent, int start) f2fs_put_page(page, 1); return ERR_PTR(-EIO); } + mark_page_accessed(page); return page; } From 0851adea72901617c3532ca71f3add4896d1312d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Oct 2014 11:04:35 -0700 Subject: [PATCH 037/321] f2fs: call write_checkpoint under disabled gc During the write_checkpoint, we should avoid f2fs_gc trigger to avoid any filesystem consistency. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3c410203..f5b63a59 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1093,7 +1093,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_minlen = range->minlen >> sbi->log_blocksize; /* do checkpoint to issue discard commands safely */ + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); out: range->len = cpc.trimmed << sbi->log_blocksize; return 0; From d3554ab79170837420204d151ea28fbf8a3b97b6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 26 Oct 2014 22:59:27 -0700 Subject: [PATCH 038/321] f2fs: flush_dcache_page for inline data When reading inline data, we should call flush_dcache_page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index b84c8579..9752838e 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -57,6 +57,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) src_addr = inline_data_addr(ipage); dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + flush_dcache_page(page); kunmap_atomic(dst_addr); f2fs_put_page(ipage, 1); out: From 5b037bf0901e2fd1d4b40651e4acfee2a8e6cd02 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Oct 2014 13:54:27 -0700 Subject: [PATCH 039/321] f2fs: do not discard data protected by the previous checkpoint We should not discard any data protected by the previous checkpoint all the time. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f5b63a59..d5dff748 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -564,7 +564,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; + dmap[i] = ~(cur_map[i] | ckpt_map[i]); while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); From 319b58c37bdfe74268662a34c8a4db03e68efc73 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Oct 2014 19:48:09 -0700 Subject: [PATCH 040/321] f2fs: revisit inline_data to avoid data races and potential bugs This patch simplifies the inline_data usage with the following rule. 1. inline_data is set during the file creation. 2. If new data is requested to be written ranges out of inline_data, f2fs converts that inode permanently. 3. There is no cases which converts non-inline_data inode to inline_data. 4. The inline_data flag should be changed under inode page lock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 89 +++++++++--------- fs/f2fs/f2fs.h | 24 ++++- fs/f2fs/file.c | 105 ++++++++++------------ fs/f2fs/inline.c | 194 +++++++++++++++++++--------------------- fs/f2fs/inode.c | 1 + fs/f2fs/namei.c | 3 + include/linux/f2fs_fs.h | 1 + 7 files changed, 210 insertions(+), 207 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0a504802..8224a473 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -737,14 +737,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; - int ret; + int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); - else + if (ret == -EAGAIN) ret = mpage_readpage(page, get_data_block); return ret; @@ -856,10 +856,11 @@ static int f2fs_write_data_page(struct page *page, else if (has_not_enough_free_secs(sbi, 0)) goto redirty_out; + err = -EAGAIN; f2fs_lock_op(sbi); - if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) - err = f2fs_write_inline_data(inode, page, offset); - else + if (f2fs_has_inline_data(inode)) + err = f2fs_write_inline_data(inode, page); + if (err == -EAGAIN) err = do_write_data_page(page, &fio); f2fs_unlock_op(sbi); done: @@ -957,24 +958,14 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_balance_fs(sbi); repeat: - err = f2fs_convert_inline_data(inode, pos + len, NULL); - if (err) - goto fail; - page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { err = -ENOMEM; goto fail; } - /* to avoid latency during memory pressure */ - unlock_page(page); - *pagep = page; - if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) - goto inline_data; - f2fs_lock_op(sbi); /* check inline_data */ @@ -982,32 +973,42 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (IS_ERR(ipage)) goto unlock_fail; + set_new_dnode(&dn, inode, ipage, ipage, 0); + if (f2fs_has_inline_data(inode)) { - f2fs_put_page(ipage, 1); - f2fs_unlock_op(sbi); - f2fs_put_page(page, 0); - goto repeat; + if (pos + len <= MAX_INLINE_DATA) { + read_inline_data(page, ipage); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); + goto put_next; + } else if (page->index == 0) { + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto unlock_fail; + } else { + struct page *p = grab_cache_page(inode->i_mapping, 0); + if (!p) { + err = -ENOMEM; + goto unlock_fail; + } + err = f2fs_convert_inline_page(&dn, p); + f2fs_put_page(p, 1); + if (err) + goto unlock_fail; + } } - - set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, index); if (err) goto unlock_fail; +put_next: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); -inline_data: - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); - goto repeat; - } - - f2fs_wait_on_page_writeback(page, DATA); - if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; + f2fs_wait_on_page_writeback(page, DATA); + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { unsigned start = pos & (PAGE_CACHE_SIZE - 1); unsigned end = start + len; @@ -1017,13 +1018,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, goto out; } - if (f2fs_has_inline_data(inode)) { - err = f2fs_read_inline_data(inode, page); - if (err) { - page_cache_release(page); - goto fail; - } - } else if (dn.data_blkaddr == NEW_ADDR) { + if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, @@ -1049,7 +1044,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, unlock_fail: f2fs_unlock_op(sbi); - f2fs_put_page(page, 0); + f2fs_put_page(page, 1); fail: f2fs_write_failed(mapping, pos + len); return err; @@ -1104,9 +1099,12 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_length(iov, nr_segs); int err; - /* Let buffer I/O handle the inline data case. */ - if (f2fs_has_inline_data(inode)) - return 0; + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } if (check_direct_IO(inode, rw, iov, offset, nr_segs)) return 0; @@ -1172,9 +1170,12 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; - if (f2fs_has_inline_data(inode)) - return 0; - + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } return generic_block_bmap(mapping, block, get_data_block); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2ef1924f..8889ab6d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1101,6 +1101,7 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_DATA_EXIST, /* indicate data exists */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1135,6 +1136,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_DATA); if (ri->i_inline & F2FS_INLINE_DENTRY) set_inode_flag(fi, FI_INLINE_DENTRY); + if (ri->i_inline & F2FS_DATA_EXIST) + set_inode_flag(fi, FI_DATA_EXIST); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1148,6 +1151,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_DATA; if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) ri->i_inline |= F2FS_INLINE_DENTRY; + if (is_inode_flag_set(fi, FI_DATA_EXIST)) + ri->i_inline |= F2FS_DATA_EXIST; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1182,6 +1187,17 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); } +static inline void f2fs_clear_inline_inode(struct inode *inode) +{ + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST); +} + +static inline int f2fs_exist_data(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1590,10 +1606,12 @@ extern const struct inode_operations f2fs_special_inode_operations; * inline.c */ bool f2fs_may_inline(struct inode *); +void read_inline_data(struct page *, struct page *); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); -int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); -void truncate_inline_data(struct inode *, u64); +int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); +int f2fs_convert_inline_inode(struct inode *); +int f2fs_write_inline_data(struct inode *, struct page *); +void truncate_inline_data(struct page *, u64); bool recover_inline_data(struct inode *, struct page *); struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, struct page **, unsigned int); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 658b02ea..5c065e72 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,35 +35,22 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - struct page *ipage; int err; f2fs_balance_fs(sbi); sb_start_pagefault(inode->i_sb); -retry: - /* force to convert with normal data indices */ - err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); - if (err) - goto out; - - /* block allocation */ - f2fs_lock_op(sbi); - - /* check inline_data */ - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - f2fs_unlock_op(sbi); - goto out; - } + /* we don't need to use inline_data strictly */ if (f2fs_has_inline_data(inode)) { - f2fs_put_page(ipage, 1); - f2fs_unlock_op(sbi); - goto retry; + err = f2fs_convert_inline_inode(inode); + if (err) + goto out; } - set_new_dnode(&dn, inode, ipage, NULL, 0); + /* block allocation */ + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_reserve_block(&dn, page->index); if (err) { f2fs_unlock_op(sbi); @@ -450,20 +437,17 @@ void truncate_data_blocks(struct dnode_of_data *dn) truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); } -static void truncate_partial_data_page(struct inode *inode, u64 from) +static int truncate_partial_data_page(struct inode *inode, u64 from) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; - if (f2fs_has_inline_data(inode)) - return truncate_inline_data(inode, from); - if (!offset) - return; + return 0; page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); if (IS_ERR(page)) - return; + return 0; lock_page(page); if (unlikely(!PageUptodate(page) || @@ -473,9 +457,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); set_page_dirty(page); - out: f2fs_put_page(page, 1); + return 0; } int truncate_blocks(struct inode *inode, u64 from, bool lock) @@ -485,33 +469,35 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; + struct page *ipage; trace_f2fs_truncate_blocks_enter(inode, from); - if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) - goto done; - free_from = (pgoff_t) - ((from + blocksize - 1) >> (sbi->log_blocksize)); + ((from + blocksize - 1) >> (sbi->log_blocksize)); if (lock) f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } + + if (f2fs_has_inline_data(inode)) { + truncate_inline_data(ipage, from); + update_inode(inode, ipage); + f2fs_put_page(ipage, 1); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - if (lock) - f2fs_unlock_op(sbi); - trace_f2fs_truncate_blocks_exit(inode, err); - return err; - } - - /* writepage can convert inline_data under get_donde_of_data */ - if (f2fs_has_inline_data(inode)) { - f2fs_put_dnode(&dn); - goto unlock_done; + goto out; } count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); @@ -527,12 +513,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); -unlock_done: + + /* lastly zero out the first data page */ + if (!err) + err = truncate_partial_data_page(inode, from); +out: if (lock) f2fs_unlock_op(sbi); -done: - /* lastly zero out the first data page */ - truncate_partial_data_page(inode, from); trace_f2fs_truncate_blocks_exit(inode, err); return err; @@ -603,10 +590,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (attr->ia_valid & ATTR_SIZE) { - err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); - if (err) - return err; - if (attr->ia_size != i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); @@ -706,9 +689,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) if (offset >= inode->i_size) return ret; - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -762,9 +747,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - ret = f2fs_convert_inline_data(inode, offset + len, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -913,9 +900,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) f2fs_balance_fs(sbi); + f2fs_clear_inline_inode(inode); + stat_dec_inline_inode(inode); set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); - return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); + return f2fs_convert_inline_inode(inode); } static int f2fs_ioc_commit_atomic_write(struct file *filp) @@ -948,6 +937,8 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + f2fs_clear_inline_inode(inode); + stat_dec_inline_inode(inode); set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); return 0; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 9752838e..bfb94e46 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -15,41 +15,26 @@ bool f2fs_may_inline(struct inode *inode) { - block_t nr_blocks; - loff_t i_size; - if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) return false; if (f2fs_is_atomic_file(inode)) return false; - nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; - if (inode->i_blocks > nr_blocks) - return false; - - i_size = i_size_read(inode); - if (i_size > MAX_INLINE_DATA) + if (!S_ISREG(inode->i_mode)) return false; return true; } -int f2fs_read_inline_data(struct inode *inode, struct page *page) +void read_inline_data(struct page *page, struct page *ipage) { - struct page *ipage; void *src_addr, *dst_addr; - if (page->index) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - goto out; - } + if (PageUptodate(page)) + return; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) { - unlock_page(page); - return PTR_ERR(ipage); - } + f2fs_bug_on(F2FS_P_SB(page), page->index); zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); @@ -59,104 +44,120 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) memcpy(dst_addr, src_addr, MAX_INLINE_DATA); flush_dcache_page(page); kunmap_atomic(dst_addr); - f2fs_put_page(ipage, 1); -out: SetPageUptodate(page); - unlock_page(page); +} +int f2fs_read_inline_data(struct inode *inode, struct page *page) +{ + struct page *ipage; + + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ipage)) { + unlock_page(page); + return PTR_ERR(ipage); + } + + if (!f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + return -EAGAIN; + } + + if (page->index) + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + else + read_inline_data(page, ipage); + + SetPageUptodate(page); + f2fs_put_page(ipage, 1); + unlock_page(page); return 0; } -static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) +int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { - int err = 0; - struct page *ipage; - struct dnode_of_data dn; void *src_addr, *dst_addr; block_t new_blk_addr; - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_io_info fio = { .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, }; + int err; - f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto out; - } + f2fs_bug_on(F2FS_I_SB(dn->inode), page->index); - /* someone else converted inline_data already */ - if (!f2fs_has_inline_data(inode)) - goto out; + if (!f2fs_exist_data(dn->inode)) + goto clear_out; - /* - * i_addr[0] is not used for inline data, - * so reserving new block will not destroy inline data - */ - set_new_dnode(&dn, inode, ipage, NULL, 0); - err = f2fs_reserve_block(&dn, 0); + err = f2fs_reserve_block(dn, 0); if (err) - goto out; + return err; f2fs_wait_on_page_writeback(page, DATA); + + if (PageUptodate(page)) + goto no_update; + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); + src_addr = inline_data_addr(dn->inode_page); dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(dst_addr); SetPageUptodate(page); - +no_update: /* write data page to try to make data consistent */ set_page_writeback(page); - write_data_page(page, &dn, &new_blk_addr, &fio); - update_extent_cache(new_blk_addr, &dn); + + write_data_page(page, dn, &new_blk_addr, &fio); + update_extent_cache(new_blk_addr, dn); f2fs_wait_on_page_writeback(page, DATA); /* clear inline data and flag after data writeback */ - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_dec_inline_inode(inode); - - sync_inode_page(&dn); - f2fs_put_dnode(&dn); -out: - f2fs_unlock_op(sbi); - return err; + truncate_inline_data(dn->inode_page, 0); +clear_out: + f2fs_clear_inline_inode(dn->inode); + stat_dec_inline_inode(dn->inode); + sync_inode_page(dn); + f2fs_put_dnode(dn); + return 0; } -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, - struct page *page) +int f2fs_convert_inline_inode(struct inode *inode) { - struct page *new_page = page; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + struct page *ipage, *page; + int err = 0; - if (!f2fs_has_inline_data(inode)) - return 0; - else if (to_size <= MAX_INLINE_DATA) - return 0; + page = grab_cache_page(inode->i_mapping, 0); + if (!page) + return -ENOMEM; + + f2fs_lock_op(sbi); - if (!page || page->index != 0) { - new_page = grab_cache_page(inode->i_mapping, 0); - if (!new_page) - return -ENOMEM; + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + f2fs_unlock_op(sbi); + return PTR_ERR(ipage); } - err = __f2fs_convert_inline_data(inode, new_page); - if (!page || page->index != 0) - f2fs_put_page(new_page, 1); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) + err = f2fs_convert_inline_page(&dn, page); + + f2fs_put_dnode(&dn); + + f2fs_unlock_op(sbi); + + f2fs_put_page(page, 1); return err; } -int f2fs_write_inline_data(struct inode *inode, - struct page *page, unsigned size) +int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; - struct page *ipage; struct dnode_of_data dn; int err; @@ -164,48 +165,37 @@ int f2fs_write_inline_data(struct inode *inode, err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; - ipage = dn.inode_page; - /* Release any data block if it is allocated */ if (!f2fs_has_inline_data(inode)) { - int count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); - truncate_data_blocks_range(&dn, count); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_inc_inline_inode(inode); + f2fs_put_dnode(&dn); + return -EAGAIN; } - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); + f2fs_bug_on(F2FS_I_SB(inode), page->index); + + f2fs_wait_on_page_writeback(dn.inode_page, NODE); src_addr = kmap_atomic(page); - dst_addr = inline_data_addr(ipage); - memcpy(dst_addr, src_addr, size); + dst_addr = inline_data_addr(dn.inode_page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(src_addr); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); sync_inode_page(&dn); f2fs_put_dnode(&dn); - return 0; } -void truncate_inline_data(struct inode *inode, u64 from) +void truncate_inline_data(struct page *ipage, u64 from) { - struct page *ipage; + void *addr; if (from >= MAX_INLINE_DATA) return; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return; - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET + from, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + addr = inline_data_addr(ipage); + memset(addr + from, 0, MAX_INLINE_DATA - from); } bool recover_inline_data(struct inode *inode, struct page *npage) @@ -245,9 +235,7 @@ bool recover_inline_data(struct inode *inode, struct page *npage) if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); + truncate_inline_data(ipage, 0); clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); update_inode(inode, ipage); f2fs_put_page(ipage, 1); @@ -367,8 +355,8 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); + truncate_inline_data(ipage, 0); + stat_dec_inline_dir(dir); clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 94470b65..ae14330f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -328,6 +328,7 @@ void handle_failed_inode(struct inode *inode) remove_inode_page(inode); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6b277d7a..d9e3ff0a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -56,6 +56,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto out; } + if (f2fs_may_inline(inode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); @@ -134,6 +136,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); + stat_inc_inline_inode(inode); d_instantiate(dentry, inode); unlock_new_inode(inode); return 0; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 63f8303b..cc1064f3 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -172,6 +172,7 @@ struct f2fs_extent { #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ +#define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) From c3d0b6f08288617e232433c5b3eb65f8744098ed Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 Oct 2014 22:27:59 -0700 Subject: [PATCH 041/321] f2fs: send discard commands in larger extent If there is a chance to make a huge sized discard command, we don't need to split it out, since each blkdev_issue_discard should wait one at a time. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 56 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d5dff748..50ea31bf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -75,6 +75,18 @@ struct llist_node *llist_reverse_order(struct llist_node *head) return new_head; } +/** + * Copied from latest linux/list.h + * list_last_entry - get the last element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + */ +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + /* * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since * MSB and LSB are reversed in a byte by f2fs_set_bit. @@ -516,10 +528,33 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) } } -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +static void __add_discard_entry(struct f2fs_sb_info *sbi, + struct cp_control *cpc, unsigned int start, unsigned int end) { struct list_head *head = &SM_I(sbi)->discard_list; - struct discard_entry *new; + struct discard_entry *new, *last; + + if (!list_empty(head)) { + last = list_last_entry(head, struct discard_entry, list); + if (START_BLOCK(sbi, cpc->trim_start) + start == + last->blkaddr + last->len) { + last->len += end - start; + goto done; + } + } + + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); + INIT_LIST_HEAD(&new->list); + new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; + new->len = end - start; + list_add_tail(&new->list, head); +done: + SM_I(sbi)->nr_discards += end - start; + cpc->trimmed += end - start; +} + +static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); int max_blocks = sbi->blocks_per_seg; struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); @@ -548,13 +583,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) } mutex_unlock(&dirty_i->seglist_lock); - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start); - new->len = sbi->blocks_per_seg; - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += sbi->blocks_per_seg; - cpc->trimmed += sbi->blocks_per_seg; + __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg); return; } @@ -576,14 +605,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (end - start < cpc->trim_minlen) continue; - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - cpc->trimmed += end - start; - - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += end - start; + __add_discard_entry(sbi, cpc, start, end); } } From 77d6271f3aa83660e758ac3cc78ad01c9811ae41 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Oct 2014 14:37:22 -0700 Subject: [PATCH 042/321] - f2fs: avoid race condition in handling wait_io __submit_merged_bio f2fs_write_end_io f2fs_write_end_io wait_io = X wait_io = x complete(X) complete(X) wait_io = NULL wait_for_completion() free(X) spin_lock(X) kernel panic In order to avoid this, this patch removes the wait_io facility. Instead, we can use wait_on_all_pages_writeback(sbi) to wait for end_ios. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +++ fs/f2fs/data.c | 35 +++++++---------------------------- fs/f2fs/f2fs.h | 1 - 3 files changed, 10 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ff377bc3..a8747a87 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -979,6 +979,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + /* wait for previous submitted meta pages writeback */ + wait_on_all_pages_writeback(sbi); + release_dirty_inode(sbi); if (unlikely(f2fs_cp_error(sbi))) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8224a473..1683aa58 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -61,11 +61,6 @@ static void f2fs_write_end_io(struct bio *bio, int err) dec_page_count(sbi, F2FS_WRITEBACK); } - if (sbi->wait_io) { - complete(sbi->wait_io); - sbi->wait_io = NULL; - } - if (!get_pages(sbi, F2FS_WRITEBACK) && !list_empty(&sbi->cp_wait.task_list)) wake_up(&sbi->cp_wait); @@ -95,34 +90,18 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; - int rw; if (!io->bio) return; - rw = fio->rw; - - if (is_read_io(rw)) { - trace_f2fs_submit_read_bio(io->sbi->sb, rw, - fio->type, io->bio); - submit_bio(rw, io->bio); - } else { - trace_f2fs_submit_write_bio(io->sbi->sb, rw, - fio->type, io->bio); - /* - * META_FLUSH is only from the checkpoint procedure, and we - * should wait this metadata bio for FS consistency. - */ - if (fio->type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - io->sbi->wait_io = &wait; - submit_bio(rw, io->bio); - wait_for_completion(&wait); - } else { - submit_bio(rw, io->bio); - } - } + if (is_read_io(fio->rw)) + trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + else + trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + submit_bio(fio->rw, io->bio); io->bio = NULL; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8889ab6d..2b64d8ee 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -516,7 +516,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ - struct completion *wait_io; /* for completion bios */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ From 083750339d7c1ae6be0df269eb499ec6399f656e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 31 Oct 2014 08:00:20 -0700 Subject: [PATCH 043/321] - f2fs: fix inline_data missing points This patch fixes some bugs in inline_data handling. o Before clearing inline_data for inmemory pages, we should convert it first. o Do convert inline_data when mmap was callled. o recovery should handle the old partition image in terms of DATA_EXIST. o recover DATA_EXIST for backward compatibility. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - fs/f2fs/file.c | 23 ++++++++++++----------- fs/f2fs/inline.c | 9 +++++++-- fs/f2fs/inode.c | 32 +++++++++++++++++++++++++++++++- 4 files changed, 50 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1683aa58..3ab18db0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -957,7 +957,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA) { read_inline_data(page, ipage); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); sync_inode_page(&dn); goto put_next; } else if (page->index == 0) { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5c065e72..73d6343b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -41,12 +41,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, sb_start_pagefault(inode->i_sb); - /* we don't need to use inline_data strictly */ - if (f2fs_has_inline_data(inode)) { - err = f2fs_convert_inline_inode(inode); - if (err) - goto out; - } + f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); /* block allocation */ f2fs_lock_op(sbi); @@ -396,6 +391,15 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct inode *inode = file_inode(file); + + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; return 0; @@ -900,8 +904,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) f2fs_balance_fs(sbi); - f2fs_clear_inline_inode(inode); - stat_dec_inline_inode(inode); set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); return f2fs_convert_inline_inode(inode); @@ -937,10 +939,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; - f2fs_clear_inline_inode(inode); - stat_dec_inline_inode(inode); set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - return 0; + + return f2fs_convert_inline_inode(inode); } static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index bfb94e46..2238b2f6 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -180,6 +180,8 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) kunmap_atomic(src_addr); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); f2fs_put_dnode(&dn); return 0; @@ -227,6 +229,10 @@ bool recover_inline_data(struct inode *inode, struct page *npage) src_addr = inline_data_addr(npage); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + update_inode(inode, ipage); f2fs_put_page(ipage, 1); return true; @@ -236,12 +242,11 @@ bool recover_inline_data(struct inode *inode, struct page *npage) ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); truncate_inline_data(ipage, 0); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { truncate_blocks(inode, 0, false); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } return false; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ae14330f..cad068d3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -66,12 +66,38 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } +static int __recover_inline_status(struct inode *inode, struct page *ipage) +{ + void *inline_data = inline_data_addr(ipage); + struct f2fs_inode *ri; + void *zbuf; + + zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); + if (!zbuf) + return -ENOMEM; + + if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { + kfree(zbuf); + return 0; + } + kfree(zbuf); + + f2fs_wait_on_page_writeback(ipage, NODE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + + ri = F2FS_INODE(ipage); + set_raw_inline(F2FS_I(inode), ri); + set_page_dirty(ipage); + return 0; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; + int err = 0; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -113,11 +139,15 @@ static int do_read_inode(struct inode *inode) get_extent_info(&fi->ext, ri->i_ext); get_inline_info(fi, ri); + /* check data exist */ + if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) + err = __recover_inline_status(inode, node_page); + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); f2fs_put_page(node_page, 1); - return 0; + return err; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) From bd514af94c0c0fb1bf4a0745fd649bae68689747 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 3 Nov 2014 20:19:39 -0800 Subject: [PATCH 044/321] f2fs: fix inline_data missing problem Sabyasachi Singh reported: Below commit in kernel/f2fs is causing the recorded video file corruption (file header is corrupt), if recorded for > 2-3 mins. the file seems to be either getting overridden for initial few bytes or file writing is getting ended before finalization. The issue is 100% seen for titan & sometime for victara (hard to reproduce on vicatara) latest builds or build #39 onward. (CR) - f2fs: fix inline_data missing points Change-Id: Ia68cf7c19bb659de20673f57d9adc6b675fb6f7c Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3ab18db0..1683aa58 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -957,6 +957,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA) { read_inline_data(page, ipage); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); sync_inode_page(&dn); goto put_next; } else if (page->index == 0) { From 4ddd9c307a125038d60b38804a42b542e1b5e914 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Nov 2014 15:16:04 -0800 Subject: [PATCH 045/321] f2fs: introduce the number of inode entries This patch adds to monitor the number of ino entries. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 27 +++++++++++++++------------ fs/f2fs/debug.c | 4 +++- fs/f2fs/f2fs.h | 2 +- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a8747a87..497da24a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -319,6 +319,8 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) e->ino = ino; list_add_tail(&e->list, &sbi->ino_list[type]); + if (type != ORPHAN_INO) + sbi->ino_num[type]++; } spin_unlock(&sbi->ino_lock[type]); } @@ -332,8 +334,7 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) if (e) { list_del(&e->list); radix_tree_delete(&sbi->ino_root[type], ino); - if (type == ORPHAN_INO) - sbi->n_orphans--; + sbi->ino_num[type]--; spin_unlock(&sbi->ino_lock[type]); kmem_cache_free(ino_entry_slab, e); return; @@ -374,6 +375,7 @@ void release_dirty_inode(struct f2fs_sb_info *sbi) list_del(&e->list); radix_tree_delete(&sbi->ino_root[i], e->ino); kmem_cache_free(ino_entry_slab, e); + sbi->ino_num[i]--; } spin_unlock(&sbi->ino_lock[i]); } @@ -384,10 +386,10 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) int err = 0; spin_lock(&sbi->ino_lock[ORPHAN_INO]); - if (unlikely(sbi->n_orphans >= sbi->max_orphans)) + if (unlikely(sbi->ino_num[ORPHAN_INO] >= sbi->max_orphans)) err = -ENOSPC; else - sbi->n_orphans++; + sbi->ino_num[ORPHAN_INO]++; spin_unlock(&sbi->ino_lock[ORPHAN_INO]); return err; @@ -396,8 +398,8 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) void release_orphan_inode(struct f2fs_sb_info *sbi) { spin_lock(&sbi->ino_lock[ORPHAN_INO]); - f2fs_bug_on(sbi, sbi->n_orphans == 0); - sbi->n_orphans--; + f2fs_bug_on(sbi, sbi->ino_num[ORPHAN_INO] == 0); + sbi->ino_num[ORPHAN_INO]--; spin_unlock(&sbi->ino_lock[ORPHAN_INO]); } @@ -461,11 +463,12 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = - (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); + unsigned short orphan_blocks; struct page *page = NULL; struct ino_entry *orphan = NULL; + orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]); + for (index = 0; index < orphan_blocks; index++) grab_meta_page(sbi, start_blk + index); @@ -893,7 +896,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); + orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); @@ -909,7 +912,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) orphan_blocks); } - if (sbi->n_orphans) + if (sbi->ino_num[ORPHAN_INO]) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); @@ -944,7 +947,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_put_page(cp_page, 1); } - if (sbi->n_orphans) { + if (sbi->ino_num[ORPHAN_INO]) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; } @@ -1046,6 +1049,7 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); spin_lock_init(&sbi->ino_lock[i]); INIT_LIST_HEAD(&sbi->ino_list[i]); + sbi->ino_num[i] = 0; } /* @@ -1054,7 +1058,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) * orphan entries with the limitation one reserved segment * for cp pack we can have max 1020*504 orphan entries */ - sbi->n_orphans = 0; sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 86e6e926..74a0d78d 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -119,6 +119,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned npages; + int i; if (si->base_mem) goto get_cache; @@ -168,8 +169,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry); si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); + for (i = 0; i <= UPDATE_INO; i++) + si->cache_mem += sbi->ino_num[i] * sizeof(struct ino_entry); } static int stat_show(struct seq_file *s, void *v) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2b64d8ee..c20ecca3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -531,9 +531,9 @@ struct f2fs_sb_info { struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ + unsigned long ino_num[MAX_INO_ENTRY]; /* number of entries */ /* for orphan inode, use 0'th array */ - unsigned int n_orphans; /* # of orphan inodes */ unsigned int max_orphans; /* max orphan inodes */ /* for directory inode management */ From 1281ee582f4d5feac70b4de0c9430517ee0ae1fb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Nov 2014 15:24:46 -0800 Subject: [PATCH 046/321] f2fs: control the memory footprint used by ino entries This patch adds to control the memory footprint used by ino entries. This will conduct best effort, not strictly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 28 ++++++++++++++++++++++------ fs/f2fs/node.h | 3 ++- fs/f2fs/segment.c | 3 ++- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f89b73a5..afd274e3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; + unsigned long avail_ram; unsigned long mem_size = 0; bool res = false; si_meminfo(&val); - /* give 25%, 25%, 50% memory for each components respectively */ + + /* only uses low memory */ + avail_ram = val.totalram - val.totalhigh; + + /* give 25%, 25%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->dirty_exceeded) return false; mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INO_ENTRIES) { + int i; + + if (sbi->sb->s_bdi->dirty_exceeded) + return false; + for (i = 0; i <= UPDATE_INO; i++) + mem_size += (sbi->ino_num[i] * sizeof(struct ino_entry)) + >> PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } return res; } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index acb71e50..d10b6448 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ - DIRTY_DENTS /* indicates dirty dentry pages */ + DIRTY_DENTS, /* indicates dirty dentry pages */ + INO_ENTRIES, /* indicates inode entries */ }; struct nat_entry_set { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 50ea31bf..7156ee98 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -335,7 +335,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || - excess_prefree_segs(sbi)) + excess_prefree_segs(sbi) || + available_free_memory(sbi, INO_ENTRIES)) f2fs_sync_fs(sbi->sb, true); } From 1d95e2404828f5fb502fff18d1a8332a1154da7d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Nov 2014 17:21:24 -0800 Subject: [PATCH 047/321] f2fs: write node pages if checkpoint is not doing It needs to write node pages if checkpoint is not doing in order to avoid memory pressure. Reviewed-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index afd274e3..f226b7e0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1316,10 +1316,12 @@ static int f2fs_write_node_page(struct page *page, return 0; } - if (wbc->for_reclaim) - goto redirty_out; - - down_read(&sbi->node_write); + if (wbc->for_reclaim) { + if (!down_read_trylock(&sbi->node_write)) + goto redirty_out; + } else { + down_read(&sbi->node_write); + } set_page_writeback(page); write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); From 1ae71fa2c7814974d84de9bcc64a34458d7bbcb7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Nov 2014 17:23:08 -0800 Subject: [PATCH 048/321] f2fs: do not skip any writes under memory pressure Under memory pressure, let's avoid skipping data writes. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 6723ccc1..7f327c0b 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -711,6 +711,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) */ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) { + if (sbi->sb->s_bdi->dirty_exceeded) + return 0; + if (type == DATA) return sbi->blocks_per_seg; else if (type == NODE) From 2c81c521c306ca5682b3e55ffe8aaac4eb012478 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 10 Nov 2014 16:29:14 -0800 Subject: [PATCH 049/321] f2fs: reduce the number of inline_data inode before clearing it The # of inline_data inode is decreased only when it has inline_data. After clearing the flag, we can't decreased the number. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 2238b2f6..7e8ee0dc 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -116,8 +116,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) /* clear inline data and flag after data writeback */ truncate_inline_data(dn->inode_page, 0); clear_out: - f2fs_clear_inline_inode(dn->inode); stat_dec_inline_inode(dn->inode); + f2fs_clear_inline_inode(dn->inode); sync_inode_page(dn); f2fs_put_dnode(dn); return 0; From 129a013312284136f8916d5514325be9150e66a1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 11 Nov 2014 11:01:01 -0800 Subject: [PATCH 050/321] f2fs: fix deadlock to grab 0'th data page The scenario is like this. One trhead triggers: f2fs_write_data_pages lock_page f2fs_write_data_page f2fs_lock_op <- wait The other thread triggers: f2fs_truncate truncate_blocks f2fs_lock_op truncate_partial_data_page lock_page <- wait for locking the page This patch resolves this bug by relocating truncate_partial_data_page. This function is just to truncate user data page and not related to FS consistency as well. And, we don't need to call truncate_inline_data. Rather than that, f2fs_write_data_page will finally update inline_data later. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 73d6343b..a62e75ad 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -490,8 +490,6 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) } if (f2fs_has_inline_data(inode)) { - truncate_inline_data(ipage, from); - update_inode(inode, ipage); f2fs_put_page(ipage, 1); goto out; } @@ -517,13 +515,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); +out: + if (lock) + f2fs_unlock_op(sbi); /* lastly zero out the first data page */ if (!err) err = truncate_partial_data_page(inode, from); -out: - if (lock) - f2fs_unlock_op(sbi); trace_f2fs_truncate_blocks_exit(inode, err); return err; From 7809c0c0155260c22a7cb24f7889a2a32343494e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 11 Nov 2014 14:10:01 -0800 Subject: [PATCH 051/321] f2fs: convert inline_data when i_size becomes large If i_size becomes large outside of MAX_INLINE_DATA, we shoud convert the inode. Otherwise, we can make some dirty pages during the truncation, and those pages will be written through f2fs_write_data_page. At that moment, the inode has still inline_data, so that it tries to write non- zero pages into inline_data area. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++++++ fs/f2fs/inline.c | 3 +++ 2 files changed, 9 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a62e75ad..3a56162c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -535,6 +535,12 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); + /* we should check inline_data size */ + if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) { + if (f2fs_convert_inline_inode(inode)) + return; + } + if (!truncate_blocks(inode, i_size_read(inode), true)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 7e8ee0dc..74edc602 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -24,6 +24,9 @@ bool f2fs_may_inline(struct inode *inode) if (!S_ISREG(inode->i_mode)) return false; + if (i_size_read(inode) > MAX_INLINE_DATA) + return false; + return true; } From 235dbd24cc61e63e1e4c0664054e7a108918905f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Nov 2014 16:06:55 -0800 Subject: [PATCH 052/321] - f2fs: fix to call put_page at the error handling routine The locked page should be released before returning the function. Change-Id: Idc520a8d85fde2710a79c6efc7875845e9b5a2d1 Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 74edc602..8fe6d180 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -141,8 +141,8 @@ int f2fs_convert_inline_inode(struct inode *inode) ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { - f2fs_unlock_op(sbi); - return PTR_ERR(ipage); + err = PTR_ERR(ipage); + goto out; } set_new_dnode(&dn, inode, ipage, ipage, 0); @@ -151,7 +151,7 @@ int f2fs_convert_inline_inode(struct inode *inode) err = f2fs_convert_inline_page(&dn, page); f2fs_put_dnode(&dn); - +out: f2fs_unlock_op(sbi); f2fs_put_page(page, 1); From 915977bd80cc0457c272dc74f414dcf47b855119 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Nov 2014 16:14:11 -0800 Subject: [PATCH 053/321] - f2fs: put the inode page when error was occurred We should put the inode page after error was occurred. Change-Id: Idc8b0c898a9a986396fd29522ec5ec1be748fd62 Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1683aa58..9f0bf075 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -963,22 +963,22 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, } else if (page->index == 0) { err = f2fs_convert_inline_page(&dn, page); if (err) - goto unlock_fail; + goto put_fail; } else { struct page *p = grab_cache_page(inode->i_mapping, 0); if (!p) { err = -ENOMEM; - goto unlock_fail; + goto put_fail; } err = f2fs_convert_inline_page(&dn, p); f2fs_put_page(p, 1); if (err) - goto unlock_fail; + goto put_fail; } } err = f2fs_reserve_block(&dn, index); if (err) - goto unlock_fail; + goto put_fail; put_next: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); @@ -1021,6 +1021,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, clear_cold_data(page); return 0; +put_fail: + f2fs_put_dnode(&dn); unlock_fail: f2fs_unlock_op(sbi); f2fs_put_page(page, 1); From ef3a1bb03630a700723d2ba05baa1bed3c357b03 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Nov 2014 11:16:01 +0800 Subject: [PATCH 054/321] f2fs: avoid unable to restart gc thread in remount In f2fs_remount, we will stop gc thread and set need_restart_gc as true when new option is set without BG_GC, then if any error occurred in the following procedure, we can restore to start the gc thread. But after that, We will fail to restore gc thread in start_gc_thread as BG_GC is not set in new option, so we'd better move this condition judgment out of start_gc_thread to fix this issue. Change-Id: Ieb0a493dc8cb6e7cbfdd99cb2e54960797a3fb7f Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 -- fs/f2fs/super.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7151d7de..ccf1bab1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -96,8 +96,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) dev_t dev = sbi->sb->s_bdev->bd_dev; int err = 0; - if (!test_opt(sbi, BG_GC)) - goto out; gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); if (!gc_th) { err = -ENOMEM; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6c5fc760..35806bab 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1131,7 +1131,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (!f2fs_readonly(sb)) { + if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) From 7e38a9b508a121485eb468b85f88e4dd8fe63224 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Nov 2014 11:17:20 +0800 Subject: [PATCH 055/321] f2fs: remove unneeded check code with option in f2fs_remount Because we have checked the contrary condition in case of "if" judgment, we do not need to check the condition again in case of "else" judgment. Let's remove it. Change-Id: Ie11115925fad120a154413221bf653e93dc45a9d Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 35806bab..7e23f005 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -662,7 +662,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) f2fs_sync_fs(sb, 1); need_restart_gc = true; } - } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + } else if (!sbi->gc_thread) { err = start_gc_thread(sbi); if (err) goto restore_opts; @@ -675,7 +675,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { destroy_flush_cmd_control(sbi); - } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) { + } else if (!SM_I(sbi)->cmd_control_info) { err = create_flush_cmd_control(sbi); if (err) goto restore_gc; From 95a92588b93e743a06451b2efeab46e7f7357c48 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Nov 2014 11:18:36 +0800 Subject: [PATCH 056/321] f2fs: introduce struct inode_management to wrap inner fields Now in f2fs, we have three inode cache: ORPHAN_INO, APPEND_INO, UPDATE_INO, and we manage fields related to inode cache separately in struct f2fs_sb_info for each inode cache type. This makes codes a bit messy, so that this patch intorduce a new struct inode_management to wrap inner fields as following which make codes more neat. /* for inner inode cache management */ struct inode_management { struct radix_tree_root ino_root; /* ino entry array */ spinlock_t ino_lock; /* for ino entry lock */ struct list_head ino_list; /* inode list head */ unsigned long ino_num; /* number of entries */ }; struct f2fs_sb_info { ... struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ ... } Change-Id: Iae55c9ffc6fce6c395dd1a2453ae83580184d014 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 95 +++++++++++++++++++++++++------------------- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 14 ++++--- fs/f2fs/node.c | 4 +- 4 files changed, 66 insertions(+), 49 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 497da24a..f2499ad1 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -299,47 +299,49 @@ const struct address_space_operations f2fs_meta_aops = { static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; retry: - spin_lock(&sbi->ino_lock[type]); + spin_lock(&im->ino_lock); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + e = radix_tree_lookup(&im->ino_root, ino); if (!e) { e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); if (!e) { - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); goto retry; } - if (radix_tree_insert(&sbi->ino_root[type], ino, e)) { - spin_unlock(&sbi->ino_lock[type]); + if (radix_tree_insert(&im->ino_root, ino, e)) { + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); goto retry; } memset(e, 0, sizeof(struct ino_entry)); e->ino = ino; - list_add_tail(&e->list, &sbi->ino_list[type]); + list_add_tail(&e->list, &im->ino_list); if (type != ORPHAN_INO) - sbi->ino_num[type]++; + im->ino_num++; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); } static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[type]); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); if (e) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[type], ino); - sbi->ino_num[type]--; - spin_unlock(&sbi->ino_lock[type]); + radix_tree_delete(&im->ino_root, ino); + im->ino_num--; + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); return; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); } void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) @@ -357,10 +359,12 @@ void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) /* mode should be APPEND_INO or UPDATE_INO */ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { + struct inode_management *im = &sbi->im[mode]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[mode]); - e = radix_tree_lookup(&sbi->ino_root[mode], ino); - spin_unlock(&sbi->ino_lock[mode]); + + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); + spin_unlock(&im->ino_lock); return e ? true : false; } @@ -370,37 +374,42 @@ void release_dirty_inode(struct f2fs_sb_info *sbi) int i; for (i = APPEND_INO; i <= UPDATE_INO; i++) { - spin_lock(&sbi->ino_lock[i]); - list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) { + struct inode_management *im = &sbi->im[i]; + + spin_lock(&im->ino_lock); + list_for_each_entry_safe(e, tmp, &im->ino_list, list) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[i], e->ino); + radix_tree_delete(&im->ino_root, e->ino); kmem_cache_free(ino_entry_slab, e); - sbi->ino_num[i]--; + im->ino_num--; } - spin_unlock(&sbi->ino_lock[i]); + spin_unlock(&im->ino_lock); } } int acquire_orphan_inode(struct f2fs_sb_info *sbi) { + struct inode_management *im = &sbi->im[ORPHAN_INO]; int err = 0; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - if (unlikely(sbi->ino_num[ORPHAN_INO] >= sbi->max_orphans)) + spin_lock(&im->ino_lock); + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else - sbi->ino_num[ORPHAN_INO]++; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + im->ino_num++; + spin_unlock(&im->ino_lock); return err; } void release_orphan_inode(struct f2fs_sb_info *sbi) { - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - f2fs_bug_on(sbi, sbi->ino_num[ORPHAN_INO] == 0); - sbi->ino_num[ORPHAN_INO]--; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + spin_lock(&im->ino_lock); + f2fs_bug_on(sbi, im->ino_num == 0); + im->ino_num--; + spin_unlock(&im->ino_lock); } void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -466,15 +475,16 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) unsigned short orphan_blocks; struct page *page = NULL; struct ino_entry *orphan = NULL; + struct inode_management *im = &sbi->im[ORPHAN_INO]; - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]); + orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); for (index = 0; index < orphan_blocks; index++) grab_meta_page(sbi, start_blk + index); index = 1; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - head = &sbi->ino_list[ORPHAN_INO]; + spin_lock(&im->ino_lock); + head = &im->ino_list; /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { @@ -514,7 +524,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) f2fs_put_page(page, 1); } - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + spin_unlock(&im->ino_lock); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -837,6 +847,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; struct page *cp_page; @@ -896,7 +907,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]); + orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); @@ -912,7 +923,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) orphan_blocks); } - if (sbi->ino_num[ORPHAN_INO]) + if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); @@ -947,7 +958,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_put_page(cp_page, 1); } - if (sbi->ino_num[ORPHAN_INO]) { + if (orphan_num) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; } @@ -1046,10 +1057,12 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) int i; for (i = 0; i < MAX_INO_ENTRY; i++) { - INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); - spin_lock_init(&sbi->ino_lock[i]); - INIT_LIST_HEAD(&sbi->ino_list[i]); - sbi->ino_num[i] = 0; + struct inode_management *im = &sbi->im[i]; + + INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); + spin_lock_init(&im->ino_lock); + INIT_LIST_HEAD(&im->ino_list); + im->ino_num = 0; } /* diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 74a0d78d..40b679ce 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -171,7 +171,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->cache_mem += npages << PAGE_CACHE_SHIFT; si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); for (i = 0; i <= UPDATE_INO; i++) - si->cache_mem += sbi->ino_num[i] * sizeof(struct ino_entry); + si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); } static int stat_show(struct seq_file *s, void *v) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c20ecca3..a3afb57e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -498,6 +498,14 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ }; +/* for inner inode cache management */ +struct inode_management { + struct radix_tree_root ino_root; /* ino entry array */ + spinlock_t ino_lock; /* for ino entry lock */ + struct list_head ino_list; /* inode list head */ + unsigned long ino_num; /* number of entries */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -527,11 +535,7 @@ struct f2fs_sb_info { bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; - /* for inode management */ - struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ - spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ - struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ - unsigned long ino_num[MAX_INO_ENTRY]; /* number of entries */ + struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ /* for orphan inode, use 0'th array */ unsigned int max_orphans; /* max orphan inodes */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f226b7e0..35a3ecde 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -60,8 +60,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) if (sbi->sb->s_bdi->dirty_exceeded) return false; for (i = 0; i <= UPDATE_INO; i++) - mem_size += (sbi->ino_num[i] * sizeof(struct ino_entry)) - >> PAGE_CACHE_SHIFT; + mem_size += (sbi->im[i].ino_num * + sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } return res; From 1744991199e057c59bec43a2c308a834b658619b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Nov 2014 10:54:48 -0800 Subject: [PATCH 057/321] - f2fs: submit bio for node blocks in the reclaim path If a node page is request to be written during the reclaiming path, we should submit the bio to avoid pending to reclaim it. Change-Id: I8bc167157a229206059e017f63e4d2d77a68deac Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 35a3ecde..e45a7bff 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1328,6 +1328,10 @@ static int f2fs_write_node_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, NODE, WRITE); + return 0; redirty_out: From a44cc51877b4fc28e06e41d0239a2db929262e84 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Nov 2014 11:03:34 -0800 Subject: [PATCH 058/321] f2fs: write SSA pages under memory pressure Under memory pressure, we don't need to skip SSA page writes. Change-Id: I3649b27fb9d69c3cf6fe07c9726ba9f57f446613 Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f2499ad1..d129a9a2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -179,7 +179,7 @@ static int f2fs_write_meta_page(struct page *page, if (unlikely(sbi->por_doing)) goto redirty_out; - if (wbc->for_reclaim) + if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; @@ -188,6 +188,9 @@ static int f2fs_write_meta_page(struct page *page, write_meta_page(sbi, page); dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, META, WRITE); return 0; redirty_out: From bde0d68b71ab8c3e151c7b6901c6c6b1d6b6856b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 18 Nov 2014 10:50:21 -0800 Subject: [PATCH 059/321] f2fs: call flush_dcache_page when the page was updated Whenever f2fs updates mapped pages, it needs to call flush_dcache_page. Change-Id: Ibcad2abf83b79ce13474041993b35744250fd277 Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 7 ++++++- fs/f2fs/inline.c | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 6759af7e..3fc115e9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -302,8 +302,10 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - if (!f2fs_has_inline_dentry(dir)) + if (!f2fs_has_inline_dentry(dir)) { kunmap(page); + flush_dcache_page(page); + } set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -381,6 +383,7 @@ static int make_empty_dir(struct inode *inode, do_make_empty_dir(inode, parent, &d); kunmap_atomic(dentry_blk); + flush_dcache_page(dentry_page); set_page_dirty(dentry_page); f2fs_put_page(dentry_page, 1); @@ -594,6 +597,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } kunmap(dentry_page); + flush_dcache_page(dentry_page); f2fs_put_page(dentry_page, 1); return err; } @@ -676,6 +680,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, NR_DENTRY_IN_BLOCK, 0); kunmap(page); /* kunmap - pair of f2fs_find_entry */ + flush_dcache_page(page); set_page_dirty(page); dir->i_ctime = dir->i_mtime = CURRENT_TIME; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8fe6d180..753f9d2d 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -45,8 +45,8 @@ void read_inline_data(struct page *page, struct page *ipage) src_addr = inline_data_addr(ipage); dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - flush_dcache_page(page); kunmap_atomic(dst_addr); + flush_dcache_page(page); SetPageUptodate(page); } @@ -107,6 +107,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(dst_addr); + flush_dcache_page(page); SetPageUptodate(page); no_update: /* write data page to try to make data consistent */ @@ -359,6 +360,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, NR_INLINE_DENTRY * F2FS_SLOT_LEN); kunmap_atomic(dentry_blk); + flush_dcache_page(page); SetPageUptodate(page); set_page_dirty(page); From 82bf615c98c7433f8cd71c65a4dba7567bd3b164 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 Nov 2014 16:51:34 -0800 Subject: [PATCH 060/321] Revert "f2fs: call flush_dcache_page when the page was updated" Need to replace a new patch. This reverts commit 0239f8665c4e50daa224cbb8220ce18209264977. --- fs/f2fs/dir.c | 7 +------ fs/f2fs/inline.c | 4 +--- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3fc115e9..6759af7e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -302,10 +302,8 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - if (!f2fs_has_inline_dentry(dir)) { + if (!f2fs_has_inline_dentry(dir)) kunmap(page); - flush_dcache_page(page); - } set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -383,7 +381,6 @@ static int make_empty_dir(struct inode *inode, do_make_empty_dir(inode, parent, &d); kunmap_atomic(dentry_blk); - flush_dcache_page(dentry_page); set_page_dirty(dentry_page); f2fs_put_page(dentry_page, 1); @@ -597,7 +594,6 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } kunmap(dentry_page); - flush_dcache_page(dentry_page); f2fs_put_page(dentry_page, 1); return err; } @@ -680,7 +676,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, NR_DENTRY_IN_BLOCK, 0); kunmap(page); /* kunmap - pair of f2fs_find_entry */ - flush_dcache_page(page); set_page_dirty(page); dir->i_ctime = dir->i_mtime = CURRENT_TIME; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 753f9d2d..8fe6d180 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -45,8 +45,8 @@ void read_inline_data(struct page *page, struct page *ipage) src_addr = inline_data_addr(ipage); dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap_atomic(dst_addr); flush_dcache_page(page); + kunmap_atomic(dst_addr); SetPageUptodate(page); } @@ -107,7 +107,6 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(dst_addr); - flush_dcache_page(page); SetPageUptodate(page); no_update: /* write data page to try to make data consistent */ @@ -360,7 +359,6 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, NR_INLINE_DENTRY * F2FS_SLOT_LEN); kunmap_atomic(dentry_blk); - flush_dcache_page(page); SetPageUptodate(page); set_page_dirty(page); From 578bd5736a83e4323d36bb139cb65fd79d279644 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 18 Nov 2014 10:50:21 -0800 Subject: [PATCH 061/321] f2fs: call flush_dcache_page when the page was updated Whenever f2fs updates mapped pages, it needs to call flush_dcache_page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 7 ++++++- fs/f2fs/inline.c | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 6759af7e..cd0c4a8c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -302,8 +302,10 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - if (!f2fs_has_inline_dentry(dir)) + if (!f2fs_has_inline_dentry(dir)) { + flush_dcache_page(page); kunmap(page); + } set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -380,6 +382,7 @@ static int make_empty_dir(struct inode *inode, make_dentry_ptr(&d, (void *)dentry_blk, 1); do_make_empty_dir(inode, parent, &d); + flush_dcache_page(dentry_page); kunmap_atomic(dentry_blk); set_page_dirty(dentry_page); @@ -593,6 +596,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, update_inode_page(dir); clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } + flush_dcache_page(dentry_page); kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; @@ -675,6 +679,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, 0); + flush_dcache_page(page); kunmap(page); /* kunmap - pair of f2fs_find_entry */ set_page_dirty(page); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8fe6d180..fc867412 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -106,6 +106,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) src_addr = inline_data_addr(dn->inode_page); dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + flush_dcache_page(page); kunmap_atomic(dst_addr); SetPageUptodate(page); no_update: @@ -358,6 +359,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, memcpy(dentry_blk->filename, inline_dentry->filename, NR_INLINE_DENTRY * F2FS_SLOT_LEN); + flush_dcache_page(page); kunmap_atomic(dentry_blk); SetPageUptodate(page); set_page_dirty(page); From 1ebf87d07f51bc0c423947b2ac091fc7b0867cb5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 Nov 2014 16:36:28 -0800 Subject: [PATCH 062/321] f2fs: introduce f2fs_dentry_kunmap to clean up This patch introduces f2fs_dentry_kunmap to clean up dirty codes. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/namei.c Change-Id: I2bf14f843a89cc401bf6dfceff16a3593f23a9cf --- fs/f2fs/dir.c | 8 ++------ fs/f2fs/f2fs.h | 10 ++++++++++ fs/f2fs/namei.c | 18 ++++++------------ fs/f2fs/recovery.c | 3 +-- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index cd0c4a8c..9fd776a1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -286,8 +286,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) de = f2fs_find_entry(dir, qstr, &page, 0); if (de) { res = le32_to_cpu(de->ino); - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); + f2fs_dentry_kunmap(dir, page, false); f2fs_put_page(page, 0); } @@ -302,10 +301,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - if (!f2fs_has_inline_dentry(dir)) { - flush_dcache_page(page); - kunmap(page); - } + f2fs_dentry_kunmap(dir, page, true); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a3afb57e..aef7055d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1228,6 +1228,16 @@ static inline void *inline_dentry_addr(struct page *page) return (void *)&(ri->i_addr[1]); } +static inline void f2fs_dentry_kunmap(struct inode *dir, + struct page *page, int modified) +{ + if (!f2fs_has_inline_dentry(dir)) { + if (modified) + flush_dcache_page(page); + kunmap(page); + } +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index d9e3ff0a..dc16fab6 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -195,8 +195,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, de = f2fs_find_entry(dir, &dentry->d_name, &page, flags); if (de) { nid_t ino = le32_to_cpu(de->ino); - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); + f2fs_dentry_kunmap(dir, page, false); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); @@ -226,8 +225,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); + f2fs_dentry_kunmap(dir, page, false); f2fs_put_page(page, 0); goto fail; } @@ -452,8 +450,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_page, new_dir); update_inode_page(old_inode); } else { - if (!f2fs_has_inline_dentry(old_inode)) - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page, false); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); @@ -466,18 +463,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - if (!f2fs_has_inline_dentry(new_dir)) - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page, false); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) { - if (!f2fs_has_inline_dentry(old_inode)) - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page, false); f2fs_put_page(old_dir_page, 0); } out_old: - if (!f2fs_has_inline_dentry(old_dir)) - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page, false); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 3ab8d822..29109992 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -129,8 +129,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) goto out; out_unmap_put: - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); + f2fs_dentry_kunmap(dir, page, false); f2fs_put_page(page, 0); out_err: iput(dir); From 8174d42096c03ae338be92317a97291245e01d7e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 Nov 2014 16:37:40 -0800 Subject: [PATCH 063/321] f2fs: fix livelock calling f2fs_iget during f2fs_evict_inode In f2fs_evict_inode, commit_inmemory_pages f2fs_gc f2fs_iget iget_locked -> wait for inode free Here, if the inode is same as the one to be evicted, f2fs should wait forever. Actually, we should not call f2fs_balance_fs during f2fs_evict_inode to avoid this. But, the commit_inmem_pages calls f2fs_balance_fs by default, even if f2fs_evict_inode wants to free inmemory pages only. Hence, this patch adds to trigger f2fs_balance_fs only when there is something to write. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7156ee98..2a6733cb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -289,7 +289,16 @@ void commit_inmem_pages(struct inode *inode, bool abort) .rw = WRITE_SYNC, }; - f2fs_balance_fs(sbi); + /* + * The abort is true only when f2fs_evict_inode is called. + * Basically, the f2fs_evict_inode doesn't produce any data writes, so + * that we don't need to call f2fs_balance_fs. + * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this + * inode becomes free by iget_locked in f2fs_iget. + */ + if (!abort) + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); mutex_lock(&fi->inmem_lock); From 18ea6661473a843fba4ac53b243a31efb069ddc2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 23 Nov 2014 22:44:40 -0800 Subject: [PATCH 064/321] f2fs: remove unnecessary flush_dcache_page This patch removed flush_dcache_pages for dentry pages, which are unnecessary. And, it revises the f2fs_dentry_kunmap for cleaning the codes. Change-Id: Ib632067db3dcb68fff70b87d36d6889a3e344117 Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 7 ++----- fs/f2fs/f2fs.h | 8 ++------ fs/f2fs/inline.c | 1 - fs/f2fs/namei.c | 12 ++++++------ fs/f2fs/recovery.c | 2 +- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9fd776a1..b317f533 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -286,7 +286,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) de = f2fs_find_entry(dir, qstr, &page, 0); if (de) { res = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, page, false); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); } @@ -301,7 +301,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - f2fs_dentry_kunmap(dir, page, true); + f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -378,7 +378,6 @@ static int make_empty_dir(struct inode *inode, make_dentry_ptr(&d, (void *)dentry_blk, 1); do_make_empty_dir(inode, parent, &d); - flush_dcache_page(dentry_page); kunmap_atomic(dentry_blk); set_page_dirty(dentry_page); @@ -592,7 +591,6 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, update_inode_page(dir); clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - flush_dcache_page(dentry_page); kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; @@ -675,7 +673,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, 0); - flush_dcache_page(page); kunmap(page); /* kunmap - pair of f2fs_find_entry */ set_page_dirty(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index aef7055d..b3fa53d6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1228,14 +1228,10 @@ static inline void *inline_dentry_addr(struct page *page) return (void *)&(ri->i_addr[1]); } -static inline void f2fs_dentry_kunmap(struct inode *dir, - struct page *page, int modified) +static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) { - if (!f2fs_has_inline_dentry(dir)) { - if (modified) - flush_dcache_page(page); + if (!f2fs_has_inline_dentry(dir)) kunmap(page); - } } static inline int f2fs_readonly(struct super_block *sb) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index fc867412..58174c71 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -359,7 +359,6 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, memcpy(dentry_blk->filename, inline_dentry->filename, NR_INLINE_DENTRY * F2FS_SLOT_LEN); - flush_dcache_page(page); kunmap_atomic(dentry_blk); SetPageUptodate(page); set_page_dirty(page); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index dc16fab6..71d0e696 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -195,7 +195,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, de = f2fs_find_entry(dir, &dentry->d_name, &page, flags); if (de) { nid_t ino = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, page, false); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); @@ -225,7 +225,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - f2fs_dentry_kunmap(dir, page, false); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); goto fail; } @@ -450,7 +450,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_page, new_dir); update_inode_page(old_inode); } else { - f2fs_dentry_kunmap(old_inode, old_dir_page, false); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); @@ -463,15 +463,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - f2fs_dentry_kunmap(new_dir, new_page, false); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) { - f2fs_dentry_kunmap(old_inode, old_dir_page, false); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_old: - f2fs_dentry_kunmap(old_dir, old_page, false); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 29109992..8180d8ef 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -129,7 +129,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) goto out; out_unmap_put: - f2fs_dentry_kunmap(dir, page, false); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); out_err: iput(dir); From cf04283fa02ced1e44a0a24691470f3e3eba6f5b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 24 Nov 2014 15:52:00 +0100 Subject: [PATCH 065/321] f2fs: fix typos for the word "destroy" in jump labels Two jump labels were adjusted in the implementation of the create_node_manager_caches() function because these identifiers contained typos. Signed-off-by: Markus Elfring Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e45a7bff..3cd684db 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2084,17 +2084,17 @@ int __init create_node_manager_caches(void) free_nid_slab = f2fs_kmem_cache_create("free_nid", sizeof(struct free_nid)); if (!free_nid_slab) - goto destory_nat_entry; + goto destroy_nat_entry; nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) - goto destory_free_nid; + goto destroy_free_nid; return 0; -destory_free_nid: +destroy_free_nid: kmem_cache_destroy(free_nid_slab); -destory_nat_entry: +destroy_nat_entry: kmem_cache_destroy(nat_entry_slab); fail: return -ENOMEM; From e732b7450fcac3d2879419b9efa6a212445f3664 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Nov 2014 10:59:45 -0800 Subject: [PATCH 066/321] - f2fs: fix deadlock during inline_data conversion A deadlock can be occurred: Thread 1] Thread 2] - f2fs_write_data_pages - f2fs_write_begin - lock_page(page #0) - grab_cache_page(page #X) - get_node_page(inode_page) - grab_cache_page(page #0) : to convert inline_data - f2fs_write_data_page - f2fs_write_inline_data - get_node_page(inode_page) In this case, trying to lock inode_page and page #0 causes deadlock. In order to avoid this, this patch adds a rule for this locking policy, which is that page #0 should be locked followed by inode_page lock. Change-Id: I8a64e0a42f58e303968a27ed9eed6f338effc7db Signed-off-by: Jaegeuk Kim Reviewed-on: http://gerrit.mot.com/692615 Tested-by: Jira Key SLTApproved: Slta Waiver SLTApproved: Connie Zhao Reviewed-by: Yin-Jun Chen Submit-Approved: Yin-Jun Chen Tested-by: Yin-Jun Chen --- fs/f2fs/data.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9f0bf075..8faf65d5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -936,6 +936,17 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); f2fs_balance_fs(sbi); + + /* + * We should check this at this moment to avoid deadlock on inode page + * and #0 page. The locking rule for inline_data conversion should be: + * lock_page(page #0) -> lock_page(inode_page) + */ + if (index != 0) { + err = f2fs_convert_inline_inode(inode); + if (err) + goto fail; + } repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { @@ -960,21 +971,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); sync_inode_page(&dn); goto put_next; - } else if (page->index == 0) { - err = f2fs_convert_inline_page(&dn, page); - if (err) - goto put_fail; - } else { - struct page *p = grab_cache_page(inode->i_mapping, 0); - if (!p) { - err = -ENOMEM; - goto put_fail; - } - err = f2fs_convert_inline_page(&dn, p); - f2fs_put_page(p, 1); - if (err) - goto put_fail; } + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto put_fail; } err = f2fs_reserve_block(&dn, index); if (err) From 54106b146904b1b183f94d6c464ef087b760301f Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Tue, 25 Nov 2014 12:44:23 +0900 Subject: [PATCH 067/321] f2fs: check dirty_nat_cnt before flushing nat entries in journal It's meaningless to check dirty_nat_cnt after re-dirtying nat entries in journal. And although there are rooms for dirty nat entires if dirty_nat_cnt is zero, it's also meaningless to check __has_cursum_space. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3cd684db..ba089bb9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -171,7 +171,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, struct nat_entry *ne) { - nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; + nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; head = radix_tree_lookup(&nm_i->nat_set_root, set); @@ -1946,6 +1946,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t set_idx = 0; LIST_HEAD(sets); + if (!nm_i->dirty_nat_cnt) + return; /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them @@ -1954,9 +1956,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); - if (!nm_i->dirty_nat_cnt) - return; - while ((found = __gang_lookup_nat_set(nm_i, set_idx, NATVEC_SIZE, setvec))) { unsigned idx; From e7c24d0d8794d0abfa4fcf7478ddf5f3b7ed6650 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Tue, 25 Nov 2014 12:44:24 +0900 Subject: [PATCH 068/321] f2fs: no more dirty_nat_entires when flushing After flushing dirty nat entries, it has to be no more dirty nat entries. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ba089bb9..0b97b566 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1926,10 +1926,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, else f2fs_put_page(page, 1); - if (!set->entry_cnt) { - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - kmem_cache_free(nat_entry_set_slab, set); - } + f2fs_bug_on(sbi, set->entry_cnt); + + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + kmem_cache_free(nat_entry_set_slab, set); } /* From 3593264d4c2c733e1c17d135abd14db2377fe508 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Nov 2014 11:34:02 -0800 Subject: [PATCH 069/321] f2fs: make clean the page before writing If a page is set to be written to the disk, we can make clean the page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 58174c71..c8339970 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -84,7 +84,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, }; - int err; + int dirty, err; f2fs_bug_on(F2FS_I_SB(dn->inode), page->index); @@ -110,12 +110,17 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) kunmap_atomic(dst_addr); SetPageUptodate(page); no_update: + /* clear dirty state */ + dirty = clear_page_dirty_for_io(page); + /* write data page to try to make data consistent */ set_page_writeback(page); write_data_page(page, dn, &new_blk_addr, &fio); update_extent_cache(new_blk_addr, dn); f2fs_wait_on_page_writeback(page, DATA); + if (dirty) + inode_dec_dirty_pages(dn->inode); /* clear inline data and flag after data writeback */ truncate_inline_data(dn->inode_page, 0); From 24320f3a613546452f97eae256d366ad7ade57c6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Nov 2014 17:27:38 -0800 Subject: [PATCH 070/321] f2fs: fix to recover converted inline_data If an inode has converted inline_data which was written to the disk, we should set its inode flag for further fsync so that this inline_data can be recovered from sudden power off. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c8339970..4674392e 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -122,6 +122,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (dirty) inode_dec_dirty_pages(dn->inode); + /* this converted inline_data should be recovered. */ + set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); + /* clear inline data and flag after data writeback */ truncate_inline_data(dn->inode_page, 0); clear_out: From 1490833b783c556fbf886e8e571bd2071e00a339 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Thu, 27 Nov 2014 16:03:08 +0900 Subject: [PATCH 071/321] f2fs: cleanup if-statement of phase in gc_data_segment Little cleanup to distinguish each phase easily Signed-off-by: Changman Lee [Jaegeuk Kim: modify indentation for code readability] Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ccf1bab1..9b112e84 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -603,27 +603,27 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, data_page = find_data_page(inode, start_bidx + ofs_in_node, false); - if (IS_ERR(data_page)) - goto next_iput; + if (IS_ERR(data_page)) { + iput(inode); + continue; + } f2fs_put_page(data_page, 0); add_gc_inode(inode, ilist); - } else { - inode = find_gc_inode(dni.ino, ilist); - if (inode) { - start_bidx = start_bidx_of_node(nofs, - F2FS_I(inode)); - data_page = get_lock_data_page(inode, + continue; + } + + /* phase 3 */ + inode = find_gc_inode(dni.ino, ilist); + if (inode) { + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + data_page = get_lock_data_page(inode, start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; - move_data_page(inode, data_page, gc_type); - stat_inc_data_blk_count(sbi, 1); - } + if (IS_ERR(data_page)) + continue; + move_data_page(inode, data_page, gc_type); + stat_inc_data_blk_count(sbi, 1); } - continue; -next_iput: - iput(inode); } if (++phase < 4) From 094d4ab6c7d0370bf8779f8ceab68267df89a3b8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 1 Dec 2014 11:30:20 +0800 Subject: [PATCH 072/321] f2fs: fix to return correct error number in f2fs_write_begin Fix the wrong error number in error path of f2fs_write_begin. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8faf65d5..881798d2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -960,8 +960,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, /* check inline_data */ ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); goto unlock_fail; + } set_new_dnode(&dn, inode, ipage, ipage, 0); From e0f43152c07dacd8259601c8b149e45f0f724117 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 1 Dec 2014 16:29:58 +0900 Subject: [PATCH 073/321] f2fs: cleanup redundant macro We've already made fi and sbi for inode. Let's avoid duplicated work. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3a56162c..7bb1542e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -200,10 +200,10 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = f2fs_sync_fs(inode->i_sb, 1); down_write(&fi->i_sem); - F2FS_I(inode)->xattr_ver = 0; + fi->xattr_ver = 0; if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { - F2FS_I(inode)->i_pino = pino; + fi->i_pino = pino; file_got_pino(inode); up_write(&fi->i_sem); mark_inode_dirty_sync(inode); @@ -235,7 +235,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) flush_out: remove_dirty_inode(sbi, ino, UPDATE_INO); clear_inode_flag(fi, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(F2FS_I_SB(inode)); + ret = f2fs_issue_flush(sbi); } out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); From 897f14beba5cc2b1c50265a1f6ab1c83bc09edd8 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Fri, 28 Nov 2014 15:49:40 +0000 Subject: [PATCH 074/321] f2fs: more fast lookup for gc_inode list If there are many inodes that have data blocks in victim segment, it takes long time to find a inode in gc_inode list. Let's use radix_tree to reduce lookup time. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/gc.c Change-Id: Ic8bc17a2f6b4cf92e6f315a6b0ff515d511c3b46 --- fs/f2fs/gc.c | 50 +++++++++++++++++++++++++++++--------------------- fs/f2fs/gc.h | 5 +++++ 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9b112e84..4697a5ba 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -338,34 +338,42 @@ static const struct victim_selection default_v_ops = { .get_victim = get_victim_by_default, }; -static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) { struct inode_entry *ie; - list_for_each_entry(ie, ilist, list) - if (ie->inode->i_ino == ino) - return ie->inode; + ie = radix_tree_lookup(&gc_list->iroot, ino); + if (ie) + return ie->inode; return NULL; } -static void add_gc_inode(struct inode *inode, struct list_head *ilist) +static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) { struct inode_entry *new_ie; + int ret; - if (inode == find_gc_inode(inode->i_ino, ilist)) { + if (inode == find_gc_inode(gc_list, inode->i_ino)) { iput(inode); return; } - +retry: new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; - list_add_tail(&new_ie->list, ilist); + + ret = radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); + if (ret) { + kmem_cache_free(winode_slab, new_ie); + goto retry; + } + list_add_tail(&new_ie->list, &gc_list->ilist); } -static void put_gc_inode(struct list_head *ilist) +static void put_gc_inode(struct gc_inode_list *gc_list) { struct inode_entry *ie, *next_ie; - list_for_each_entry_safe(ie, next_ie, ilist, list) { + list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { + radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); kmem_cache_free(winode_slab, ie); @@ -551,7 +559,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) * the victim data block is ignored. */ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, - struct list_head *ilist, unsigned int segno, int gc_type) + struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; struct f2fs_summary *entry; @@ -609,12 +617,12 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } f2fs_put_page(data_page, 0); - add_gc_inode(inode, ilist); + add_gc_inode(gc_list, inode); continue; } /* phase 3 */ - inode = find_gc_inode(dni.ino, ilist); + inode = find_gc_inode(gc_list, dni.ino); if (inode) { start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); data_page = get_lock_data_page(inode, @@ -657,7 +665,7 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, } static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, - struct list_head *ilist, int gc_type) + struct gc_inode_list *gc_list, int gc_type) { struct page *sum_page; struct f2fs_summary_block *sum; @@ -675,7 +683,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, gc_node_segment(sbi, sum->entries, segno, gc_type); break; case SUM_TYPE_DATA: - gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); break; } blk_finish_plug(&plug); @@ -688,16 +696,16 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, int f2fs_gc(struct f2fs_sb_info *sbi) { - struct list_head ilist; unsigned int segno, i; int gc_type = BG_GC; int nfree = 0; int ret = -1; - struct cp_control cpc = { - .reason = CP_SYNC, + struct cp_control cpc; + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(GFP_ATOMIC), }; - INIT_LIST_HEAD(&ilist); gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; @@ -719,7 +727,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) META_SSA); for (i = 0; i < sbi->segs_per_sec; i++) - do_garbage_collect(sbi, segno + i, &ilist, gc_type); + do_garbage_collect(sbi, segno + i, &gc_list, gc_type); if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -735,7 +743,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) stop: mutex_unlock(&sbi->gc_mutex); - put_gc_inode(&ilist); + put_gc_inode(&gc_list); return ret; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 16f0b2b2..6ff7ad38 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -40,6 +40,11 @@ struct inode_entry { struct inode *inode; }; +struct gc_inode_list { + struct list_head ilist; + struct radix_tree_root iroot; +}; + /* * inline functions */ From 446921effde239ab8d3ac7bd3ba708c78c6afa2b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Dec 2014 16:40:28 -0800 Subject: [PATCH 075/321] f2fs: fix missing kmem_cache_free This patch fixes missing kmem_cache_free when handling errors. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0b97b566..b7f48727 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -158,7 +158,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, head->entry_cnt = 0; if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { - cond_resched(); + kmem_cache_free(nat_entry_set_slab, head); goto retry; } } From 6b7afe9b36b13f6a3aec32e595c1b5dbf200021f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Dec 2014 21:15:10 -0800 Subject: [PATCH 076/321] f2fs: use rw_semaphore for nat entry lock Previoulsy, we used rwlock for nat_entry lock. But, now we have a lot of complex operations in set_node_addr. (e.g., allocating kernel memories, handling radix_trees, and so on) So, this patches tries to change spinlock to rw_semaphore to give CPUs to other threads. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 52 +++++++++++++++++++++++++------------------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b3fa53d6..e1f926d6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -331,7 +331,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b7f48727..972dbdfa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -196,11 +196,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -210,11 +210,11 @@ bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool fsynced = false; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) fsynced = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return fsynced; } @@ -224,13 +224,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need_update; } @@ -258,17 +258,17 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, { struct nat_entry *e; retry: - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); if (!e) { - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); goto retry; } node_info_from_raw_nat(&e->ni, ne); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -277,12 +277,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; retry: - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); if (!e) { - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); goto retry; } e->ni = *ni; @@ -326,7 +326,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -336,7 +336,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) if (available_free_memory(sbi, NAT_ENTRIES)) return 0; - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; ne = list_first_entry(&nm_i->nat_entries, @@ -344,7 +344,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); return nr_shrink; } @@ -367,14 +367,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); } - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (e) return; @@ -1433,13 +1433,13 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) if (build) { /* do not add allocated nids */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (allocated) return 0; } @@ -1828,20 +1828,20 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) raw_ne = nat_in_journal(sum, i); retry: - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (ne) goto found; ne = grab_nat_entry(nm_i, nid); if (!ne) { - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); goto retry; } node_info_from_raw_nat(&ne->ni, &raw_ne); found: __set_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); @@ -1912,10 +1912,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, } raw_nat_from_node_info(raw_ne, &ne->ni); - write_lock(&NM_I(sbi)->nat_tree_lock); + down_write(&NM_I(sbi)->nat_tree_lock); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - write_unlock(&NM_I(sbi)->nat_tree_lock); + up_write(&NM_I(sbi)->nat_tree_lock); if (nat_get_blkaddr(ne) == NULL_ADDR) add_free_nid(sbi, nid, false); @@ -2001,7 +2001,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - rwlock_init(&nm_i->nat_tree_lock); + init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2057,7 +2057,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2066,7 +2066,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, natvec[idx]); } f2fs_bug_on(sbi, nm_i->nat_cnt); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; From d53de8a54eb9120f6591f056d4a6af954f2820a7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Dec 2014 20:47:26 -0800 Subject: [PATCH 077/321] f2fs: call radix_tree_preload before radix_tree_insert This patch tries to fix: BUG: using smp_processor_id() in preemptible [00000000] code: f2fs_gc-254:0/384 (radix_tree_node_alloc+0x14/0x74) from [] (radix_tree_insert+0x110/0x200) (radix_tree_insert+0x110/0x200) from [] (gc_data_segment+0x340/0x52c) (gc_data_segment+0x340/0x52c) from [] (f2fs_gc+0x208/0x400) (f2fs_gc+0x208/0x400) from [] (gc_thread_func+0x248/0x28c) (gc_thread_func+0x248/0x28c) from [] (kthread+0xa0/0xac) (kthread+0xa0/0xac) from [] (ret_from_fork+0x14/0x3c) The reason is that f2fs calls radix_tree_insert under enabled preemption. So, before calling it, we need to call radix_tree_preload. Otherwise, we should use _GFP_WAIT for the radix tree, and use mutex or semaphore to cover the radix tree operations. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++++++ fs/f2fs/gc.c | 6 ++---- fs/f2fs/node.c | 11 +++++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d129a9a2..0759469f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -305,6 +305,11 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) struct inode_management *im = &sbi->im[type]; struct ino_entry *e; retry: + if (radix_tree_preload(GFP_NOFS)) { + cond_resched(); + goto retry; + } + spin_lock(&im->ino_lock); e = radix_tree_lookup(&im->ino_root, ino); @@ -312,11 +317,13 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); if (!e) { spin_unlock(&im->ino_lock); + radix_tree_preload_end(); goto retry; } if (radix_tree_insert(&im->ino_root, ino, e)) { spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); + radix_tree_preload_end(); goto retry; } memset(e, 0, sizeof(struct ino_entry)); @@ -327,6 +334,7 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) im->ino_num++; } spin_unlock(&im->ino_lock); + radix_tree_preload_end(); } static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 4697a5ba..ca81b735 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -351,7 +351,6 @@ static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) { struct inode_entry *new_ie; - int ret; if (inode == find_gc_inode(gc_list, inode->i_ino)) { iput(inode); @@ -361,8 +360,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; - ret = radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); - if (ret) { + if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { kmem_cache_free(winode_slab, new_ie); goto retry; } @@ -703,7 +701,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) struct cp_control cpc; struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), - .iroot = RADIX_TREE_INIT(GFP_ATOMIC), + .iroot = RADIX_TREE_INIT(GFP_NOFS), }; gc_more: diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 972dbdfa..023fc33f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1448,15 +1448,22 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i->nid = nid; i->state = NID_NEW; + if (radix_tree_preload(GFP_NOFS)) { + kmem_cache_free(free_nid_slab, i); + return 0; + } + spin_lock(&nm_i->free_nid_list_lock); if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); kmem_cache_free(free_nid_slab, i); return 0; } list_add_tail(&i->list, &nm_i->free_nid_list); nm_i->fcnt++; spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); return 1; } @@ -1995,8 +2002,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); - INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); - INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); + INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); mutex_init(&nm_i->build_lock); From 39a2caee2c0b7a441aaa894e83598e51697e7423 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 10:39:49 -0800 Subject: [PATCH 078/321] f2fs: do retry operations with cond_resched This patch revists retrial paths in f2fs. The basic idea is to use cond_resched instead of retrying from the very early stage. Suggested-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 +++++++ fs/f2fs/gc.c | 5 ++--- fs/f2fs/node.c | 41 +++++++++-------------------------------- fs/f2fs/segment.c | 5 ++--- 4 files changed, 20 insertions(+), 38 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e1f926d6..bd0ac5b6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1020,6 +1020,13 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } +static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + while (radix_tree_insert(root, index, item)) + cond_resched(); +} + #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) static inline bool IS_INODE(struct page *page) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ca81b735..d9adaa92 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -356,12 +356,11 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) iput(inode); return; } -retry: new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; - +retry: if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { - kmem_cache_free(winode_slab, new_ie); + cond_resched(); goto retry; } list_add_tail(&new_ie->list, &gc_list->ilist); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 023fc33f..c8676f6f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -147,7 +147,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) return; -retry: + head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); @@ -156,11 +156,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, INIT_LIST_HEAD(&head->set_list); head->set = set; head->entry_cnt = 0; - - if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { - kmem_cache_free(nat_entry_set_slab, head); - goto retry; - } + f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } list_move_tail(&ne->list, &head->entry_list); nm_i->dirty_nat_cnt++; @@ -238,13 +234,8 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; - new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); memset(new, 0, sizeof(struct nat_entry)); nat_set_nid(new, nid); nat_reset_flag(new); @@ -257,15 +248,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, struct f2fs_nat_entry *ne) { struct nat_entry *e; -retry: + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); - if (!e) { - up_write(&nm_i->nat_tree_lock); - goto retry; - } node_info_from_raw_nat(&e->ni, ne); } up_write(&nm_i->nat_tree_lock); @@ -276,15 +263,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; -retry: + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); - if (!e) { - up_write(&nm_i->nat_tree_lock); - goto retry; - } e->ni = *ni; f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -1834,19 +1817,13 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); raw_ne = nat_in_journal(sum, i); -retry: + down_write(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); - if (ne) - goto found; - - ne = grab_nat_entry(nm_i, nid); if (!ne) { - up_write(&nm_i->nat_tree_lock); - goto retry; + ne = grab_nat_entry(nm_i, nid); + node_info_from_raw_nat(&ne->ni, &raw_ne); } - node_info_from_raw_nat(&ne->ni, &raw_ne); -found: __set_nat_cache_dirty(nm_i, ne); up_write(&nm_i->nat_tree_lock); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2a6733cb..27bcf13f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -238,13 +238,13 @@ void register_inmem_page(struct inode *inode, struct page *page) struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; int err; -retry: + new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ new->page = page; INIT_LIST_HEAD(&new->list); - +retry: /* increase reference count with clean state */ mutex_lock(&fi->inmem_lock); err = radix_tree_insert(&fi->inmem_root, page->index, new); @@ -254,7 +254,6 @@ void register_inmem_page(struct inode *inode, struct page *page) return; } else if (err) { mutex_unlock(&fi->inmem_lock); - kmem_cache_free(inmem_entry_slab, new); goto retry; } get_page(page); From 3dcde17ae52b76e5434a75f3a4a0179779d55734 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 10:51:50 -0800 Subject: [PATCH 079/321] f2fs: count inline_xx in do_read_inode In do_read_inode, if we failed __recover_inline_status, the inode has inline flag without increasing its count. Later, f2fs_evict_inode will decrease the count, which causes -1. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cad068d3..26e8c1b5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -147,6 +147,10 @@ static int do_read_inode(struct inode *inode) __get_inode_rdev(inode, ri); f2fs_put_page(node_page, 1); + + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + return err; } @@ -198,8 +202,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) goto bad_inode; } unlock_new_inode(inode); - stat_inc_inline_inode(inode); - stat_inc_inline_dir(inode); trace_f2fs_iget(inode); return inode; From db882b0aea456b252712a110fb3f322034080b6a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 11:58:02 -0800 Subject: [PATCH 080/321] f2fs: set page private for inmemory pages for truncation The inmemory pages should be handled by invalidate_page since it needs to be released int the truncation path. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 27bcf13f..5bb214d3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -239,6 +239,8 @@ void register_inmem_page(struct inode *inode, struct page *page) struct inmem_pages *new; int err; + SetPagePrivate(page); + new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ From b9904a17bbfbe37d32c8a063cebb9e3467121813 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 14:37:37 -0800 Subject: [PATCH 081/321] f2fs: release inmemory pages when the file was closed If file is closed, let's drop inmemory pages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7bb1542e..f446c645 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -913,6 +913,14 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) return f2fs_convert_inline_inode(inode); } +static int f2fs_release_file(struct inode *inode, struct file *filp) +{ + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + commit_inmem_pages(inode, true); + return 0; +} + static int f2fs_ioc_commit_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); @@ -1022,6 +1030,7 @@ const struct file_operations f2fs_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .open = generic_file_open, + .release = f2fs_release_file, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, From 629a6ddc99643db911c5d7d9f39469c32ec4f7cb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 17:18:15 -0800 Subject: [PATCH 082/321] f2fs: count the number of inmemory pages This patch adds counting # of inmemory pages in the page cache. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 +++ fs/f2fs/f2fs.h | 3 ++- fs/f2fs/segment.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 40b679ce..4e2e39c0 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -39,6 +39,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); @@ -249,6 +250,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_puts(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - inmem: %4d\n", + si->inmem_pages); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bd0ac5b6..41f6bc78 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -461,6 +461,7 @@ enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_NODES, F2FS_DIRTY_META, + F2FS_INMEM_PAGES, NR_COUNT_TYPE, }; @@ -1497,7 +1498,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, sits, fnids; int total_count, utilization; - int bg_gc, inline_inode, inline_dir; + int bg_gc, inline_inode, inline_dir, inmem_pages; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5bb214d3..443372e5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -260,6 +260,7 @@ void register_inmem_page(struct inode *inode, struct page *page) } get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); } @@ -275,6 +276,7 @@ void invalidate_inmem_page(struct inode *inode, struct page *page) f2fs_put_page(cur->page, 0); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } mutex_unlock(&fi->inmem_lock); } @@ -316,6 +318,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) f2fs_put_page(cur->page, 1); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } if (submit_bio) f2fs_submit_merged_bio(sbi, DATA, WRITE); From 161b78b85954065ce2b34142ad3f4a699cdcbc41 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 8 Dec 2014 15:29:40 +0900 Subject: [PATCH 083/321] f2fs: check if inode state is dirty at fsync If inode state is dirty, go straight to write. Suggested-by: Jaegeuk Kim Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f446c645..4082c07e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -132,6 +132,17 @@ static inline bool need_do_checkpoint(struct inode *inode) return need_cp; } +static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + bool ret = false; + /* But we need to avoid that there are some inode updates */ + if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) + ret = true; + f2fs_put_page(i, 0); + return ret; +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -162,19 +173,21 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return ret; } + /* if the inode is dirty, let's recover all the time */ + if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { + update_inode_page(inode); + goto go_write; + } + /* * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { - struct page *i = find_get_page(NODE_MAPPING(sbi), ino); - /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { - f2fs_put_page(i, 0); + /* it may call write_inode just prior to fsync */ + if (need_inode_page_update(sbi, ino)) goto go_write; - } - f2fs_put_page(i, 0); if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) From b68a76445b1525c48e77d1ca9a30ff545d98fc42 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 8 Dec 2014 15:29:41 +0900 Subject: [PATCH 084/321] f2fs: cleanup path to need cp at fsync Added some commentaries for code readability and cleaned up if-statement clearly. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 79 +++++++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4082c07e..edd73881 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -143,6 +143,26 @@ static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) return ret; } +static void try_to_fix_pino(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + nid_t pino; + + down_write(&fi->i_sem); + fi->xattr_ver = 0; + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + fi->i_pino = pino; + file_got_pino(inode); + up_write(&fi->i_sem); + + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + } else { + up_write(&fi->i_sem); + } +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -207,49 +227,36 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) up_read(&fi->i_sem); if (need_cp) { - nid_t pino; - /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); - down_write(&fi->i_sem); - fi->xattr_ver = 0; - if (file_wrong_pino(inode) && inode->i_nlink == 1 && - get_parent_ino(inode, &pino)) { - fi->i_pino = pino; - file_got_pino(inode); - up_write(&fi->i_sem); - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - } else { - up_write(&fi->i_sem); - } - } else { + /* + * We've secured consistency through sync_fs. Following pino + * will be used only for fsynced inodes after checkpoint. + */ + try_to_fix_pino(inode); + goto out; + } sync_nodes: - sync_node_pages(sbi, ino, &wbc); - - if (need_inode_block_update(sbi, ino)) { - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - goto sync_nodes; - } + sync_node_pages(sbi, ino, &wbc); - ret = wait_on_node_pages_writeback(sbi, ino); - if (ret) - goto out; + if (need_inode_block_update(sbi, ino)) { + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + goto sync_nodes; + } - /* once recovery info is written, don't need to tack this */ - remove_dirty_inode(sbi, ino, APPEND_INO); - clear_inode_flag(fi, FI_APPEND_WRITE); + ret = wait_on_node_pages_writeback(sbi, ino); + if (ret) + goto out; + + /* once recovery info is written, don't need to tack this */ + remove_dirty_inode(sbi, ino, APPEND_INO); + clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: - remove_dirty_inode(sbi, ino, UPDATE_INO); - clear_inode_flag(fi, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(sbi); - } + remove_dirty_inode(sbi, ino, UPDATE_INO); + clear_inode_flag(fi, FI_UPDATE_WRITE); + ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; From bb30047a8e3db54c519ccd8dfacc8c936ac819e2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Dec 2014 19:08:20 +0800 Subject: [PATCH 085/321] f2fs: use atomic for counting inode with inline_{dir,inode} flag As inline_{dir,inode} stat is increased/decreased concurrently by multi threads, so the value is not so accurate, let's use atomic type for counting accurately. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 7 +++++-- fs/f2fs/f2fs.h | 12 ++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 4e2e39c0..91e8f699 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -46,8 +46,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->valid_count = valid_user_blocks(sbi); si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); - si->inline_inode = sbi->inline_inode; - si->inline_dir = sbi->inline_dir; + si->inline_inode = atomic_read(&sbi->inline_inode); + si->inline_dir = atomic_read(&sbi->inline_dir); si->utilization = utilization(sbi); si->free_segs = free_segments(sbi); @@ -329,6 +329,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->sbi = sbi; sbi->stat_info = si; + atomic_set(&sbi->inline_inode, 0); + atomic_set(&sbi->inline_dir, 0); + mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); mutex_unlock(&f2fs_stat_mutex); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 41f6bc78..fedc534e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -589,8 +589,8 @@ struct f2fs_sb_info { unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ - int inline_inode; /* # of inline_data inodes */ - int inline_dir; /* # of inline_dentry inodes */ + atomic_t inline_inode; /* # of inline_data inodes */ + atomic_t inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ unsigned int n_dirty_dirs; /* # of dir inodes */ #endif @@ -1531,22 +1531,22 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode++); \ + (atomic_inc(&F2FS_I_SB(inode)->inline_inode)); \ } while (0) #define stat_dec_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode--); \ + (atomic_dec(&F2FS_I_SB(inode)->inline_inode)); \ } while (0) #define stat_inc_inline_dir(inode) \ do { \ if (f2fs_has_inline_dentry(inode)) \ - ((F2FS_I_SB(inode))->inline_dir++); \ + (atomic_inc(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) #define stat_dec_inline_dir(inode) \ do { \ if (f2fs_has_inline_dentry(inode)) \ - ((F2FS_I_SB(inode))->inline_dir--); \ + (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) From 31dd9e217b8a87c6dd3209cc5c8a95fd4374c7e2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Dec 2014 14:56:19 +0800 Subject: [PATCH 086/321] f2fs: fix to enable readahead for SSA/CP blocks 1.We use zero as upper boundary value for ra SSA/CP blocks, we will skip readahead as verification failure with max number, it causes low performance. 2.Low boundary value is not accurate for SSA/CP/POR region verification, so these values need to be redefined. This patch fixes above issues. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0759469f..f0c2c03e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -96,8 +96,9 @@ static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) case META_SIT: return SIT_BLK_CNT(sbi); case META_SSA: + return MAIN_BLKADDR(sbi); case META_CP: - return 0; + return SM_I(sbi)->sit_info->sit_base_addr; case META_POR: return MAX_BLKADDR(sbi); default: @@ -142,11 +143,23 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type prev_blk_addr = blk_addr; break; case META_SSA: + if (unlikely(blkno >= max_blks)) + goto out; + if (unlikely(blkno < SM_I(sbi)->ssa_blkaddr)) + goto out; + blk_addr = blkno; + break; case META_CP: + if (unlikely(blkno >= max_blks)) + goto out; + if (unlikely(blkno < __start_cp_addr(sbi))) + goto out; + blk_addr = blkno; + break; case META_POR: if (unlikely(blkno >= max_blks)) goto out; - if (unlikely(blkno < SEG0_BLKADDR(sbi))) + if (unlikely(blkno < MAIN_BLKADDR(sbi))) goto out; blk_addr = blkno; break; From b12a563850b5c7508453aa76162d3500d5472339 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Dec 2014 14:59:17 +0800 Subject: [PATCH 087/321] f2fs: introduce is_valid_blkaddr to cleanup codes in ra_meta_pages This patch does cleanup work, it introduces is_valid_blkaddr() to include verification code for blkaddr with upper and down boundary value which were in ra_meta_pages previous. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 53 ++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f0c2c03e..129eca32 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -88,22 +88,36 @@ struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) return get_meta_page(sbi, index); } -static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) { switch (type) { case META_NAT: - return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK; + break; case META_SIT: - return SIT_BLK_CNT(sbi); + if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) + return false; + break; case META_SSA: - return MAIN_BLKADDR(sbi); + if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || + blkaddr < SM_I(sbi)->ssa_blkaddr)) + return false; + break; case META_CP: - return SM_I(sbi)->sit_info->sit_base_addr; + if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || + blkaddr < __start_cp_addr(sbi))) + return false; + break; case META_POR: - return MAX_BLKADDR(sbi); + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } + + return true; } /* @@ -114,7 +128,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type block_t prev_blk_addr = 0; struct page *page; block_t blkno = start; - block_t max_blks = get_max_meta_blks(sbi, type); struct f2fs_io_info fio = { .type = META, @@ -124,18 +137,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type for (; nrpages-- > 0; blkno++) { block_t blk_addr; + if (!is_valid_blkaddr(sbi, blkno, type)) + goto out; + switch (type) { case META_NAT: - /* get nat block addr */ - if (unlikely(blkno >= max_blks)) + if (unlikely(blkno >= + NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) blkno = 0; + /* get nat block addr */ blk_addr = current_nat_addr(sbi, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: /* get sit block addr */ - if (unlikely(blkno >= max_blks)) - goto out; blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); if (blkno != start && prev_blk_addr + 1 != blk_addr) @@ -143,24 +158,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type prev_blk_addr = blk_addr; break; case META_SSA: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < SM_I(sbi)->ssa_blkaddr)) - goto out; - blk_addr = blkno; - break; case META_CP: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < __start_cp_addr(sbi))) - goto out; - blk_addr = blkno; - break; case META_POR: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < MAIN_BLKADDR(sbi))) - goto out; blk_addr = blkno; break; default: From 98fbe8048f132097238d87161e51f1070a8e2cc0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Dec 2014 15:02:52 +0800 Subject: [PATCH 088/321] f2fs: avoid to ra unneeded blocks in recover flow To improve recovery speed, f2fs try to readahead many contiguous blocks in warm node segment, but for most time, abnormal power-off do not occur frequently, so when mount a normal power-off f2fs image, by contrary ra so many blocks and then invalid them will hurt the performance of mount. It's better to just ra the first next-block for normal condition. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 29 ++++++++++++++--------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/recovery.c | 10 ++++++++-- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 129eca32..f0f2578f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -73,21 +73,6 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) return page; } -struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) -{ - bool readahead = false; - struct page *page; - - page = find_get_page(META_MAPPING(sbi), index); - if (!page || (page && !PageUptodate(page))) - readahead = true; - f2fs_put_page(page, 0); - - if (readahead) - ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); - return get_meta_page(sbi, index); -} - static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { @@ -182,6 +167,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type return blkno - start; } +void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + bool readahead = false; + + page = find_get_page(META_MAPPING(sbi), index); + if (!page || (page && !PageUptodate(page))) + readahead = true; + f2fs_put_page(page, 0); + + if (readahead) + ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); +} + static int f2fs_write_meta_page(struct page *page, struct writeback_control *wbc) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fedc534e..88221043 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1430,8 +1430,8 @@ void destroy_segment_manager_caches(void); */ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); +void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 8180d8ef..34be4d76 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -170,13 +170,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + ra_meta_pages(sbi, blkaddr, 1, META_POR); + while (1) { struct fsync_inode_entry *entry; if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) return 0; - page = get_meta_page_ra(sbi, blkaddr); + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) break; @@ -227,6 +229,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); + + ra_meta_pages_cond(sbi, blkaddr); } f2fs_put_page(page, 1); return err; @@ -436,7 +440,9 @@ static int recover_data(struct f2fs_sb_info *sbi, if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) break; - page = get_meta_page_ra(sbi, blkaddr); + ra_meta_pages_cond(sbi, blkaddr); + + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) { f2fs_put_page(page, 1); From 02884935cc2aa6f17ec115ab2e3fbb28de75a4ce Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 Dec 2014 15:18:34 -0800 Subject: [PATCH 089/321] f2fs: remove checking dirty_exceed We don't need to force to write dirty_exceeded for f2fs_balance_fs_bg. This flag was only meaningful to write bypassing conditions. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c8676f6f..ad29bb5a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -57,8 +57,6 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) } else if (type == INO_ENTRIES) { int i; - if (sbi->sb->s_bdi->dirty_exceeded) - return false; for (i = 0; i <= UPDATE_INO; i++) mem_size += (sbi->im[i].ino_num * sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; From b29faf0cf01872983234d96bd969b2bae8783226 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 Dec 2014 15:20:48 -0800 Subject: [PATCH 090/321] f2fs: fix wrong condition check to trigger f2fs_sync_fs If there is not enough available memory, we need to trigger f2fs_sync_fs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 443372e5..08acdf56 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -349,7 +349,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || excess_prefree_segs(sbi) || - available_free_memory(sbi, INO_ENTRIES)) + !available_free_memory(sbi, INO_ENTRIES)) f2fs_sync_fs(sbi->sb, true); } From e469a1d2d8718c3f2fd6a169acfd49598714acfe Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 Dec 2014 13:59:33 -0800 Subject: [PATCH 091/321] f2fs: don't need to call lock_op and lock_page for abort We don't need to call lock_op and lock_page at the aborting path. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 08acdf56..cfda2e31 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -299,33 +299,38 @@ void commit_inmem_pages(struct inode *inode, bool abort) * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this * inode becomes free by iget_locked in f2fs_iget. */ - if (!abort) + if (!abort) { f2fs_balance_fs(sbi); - - f2fs_lock_op(sbi); + f2fs_lock_op(sbi); + } mutex_lock(&fi->inmem_lock); list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { - lock_page(cur->page); - if (!abort && cur->page->mapping == inode->i_mapping) { - f2fs_wait_on_page_writeback(cur->page, DATA); - if (clear_page_dirty_for_io(cur->page)) - inode_dec_dirty_pages(inode); - do_write_data_page(cur->page, &fio); - submit_bio = true; + if (!abort) { + lock_page(cur->page); + if (cur->page->mapping == inode->i_mapping) { + f2fs_wait_on_page_writeback(cur->page, DATA); + if (clear_page_dirty_for_io(cur->page)) + inode_dec_dirty_pages(inode); + do_write_data_page(cur->page, &fio); + submit_bio = true; + } + f2fs_put_page(cur->page, 1); + } else { + put_page(cur->page); } radix_tree_delete(&fi->inmem_root, cur->page->index); - f2fs_put_page(cur->page, 1); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } - if (submit_bio) - f2fs_submit_merged_bio(sbi, DATA, WRITE); mutex_unlock(&fi->inmem_lock); - filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); - f2fs_unlock_op(sbi); + if (!abort) { + f2fs_unlock_op(sbi); + if (submit_bio) + f2fs_submit_merged_bio(sbi, DATA, WRITE); + } } /* From 2052cd4c783bb9d0f8da4c30fcbaf192db1194ef Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 9 Dec 2014 06:08:59 -0800 Subject: [PATCH 092/321] - f2fs: change atomic and volatile write policies This patch adds two new ioctls to release inmemory pages grabbed by atomic writes. o f2fs_ioc_abort_volatile_write - If transaction was failed, all the grabbed pages and data should be written. o f2fs_ioc_release_volatile_write - This is to enhance the performance of PERSIST mode in sqlite. In order to avoid huge memory consumption which causes OOM, this patch changes volatile writes to use normal dirty pages, instead blocked flushing to the disk as long as system does not suffer from memory pressure. Change-Id: Ib92148c00fae08278a9032fde6608e1c405258e2 Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 9 ++++-- fs/f2fs/f2fs.h | 8 +++++ fs/f2fs/file.c | 77 +++++++++++++++++++++++++++++++++++++++++------ fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 3 ++ fs/f2fs/node.h | 1 + fs/f2fs/segment.c | 2 +- 7 files changed, 88 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 881798d2..d4883e88 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -814,6 +814,11 @@ static int f2fs_write_data_page(struct page *page, write: if (unlikely(sbi->por_doing)) goto redirty_out; + if (f2fs_is_drop_cache(inode)) + goto out; + if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && + available_free_memory(sbi, BASE_CHECK)) + goto redirty_out; /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { @@ -1111,7 +1116,7 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) if (offset % PAGE_CACHE_SIZE) return; - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + if (f2fs_is_atomic_file(inode)) invalidate_inmem_page(inode, page); if (PageDirty(page)) @@ -1134,7 +1139,7 @@ static int f2fs_set_data_page_dirty(struct page *page) SetPageUptodate(page); - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) { + if (f2fs_is_atomic_file(inode)) { register_inmem_page(inode, page); return 1; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 88221043..1d7220d1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -200,6 +200,8 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) +#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) +#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* @@ -1112,6 +1114,7 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ }; @@ -1219,6 +1222,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); } +static inline bool f2fs_is_drop_cache(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); +} + static inline void *inline_data_addr(struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index edd73881..93a92b33 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -848,6 +848,19 @@ static long f2fs_fallocate(struct file *file, int mode, return ret; } +static int f2fs_release_file(struct inode *inode, struct file *filp) +{ + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode)) + commit_inmem_pages(inode, true); + if (f2fs_is_volatile_file(inode)) { + set_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + filemap_fdatawrite(inode->i_mapping); + clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + } + return 0; +} + #define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) #define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) @@ -921,26 +934,20 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (!inode_owner_or_capable(inode)) return -EACCES; - f2fs_balance_fs(sbi); + f2fs_balance_fs(F2FS_I_SB(inode)); + + if (f2fs_is_atomic_file(inode)) + return 0; set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); return f2fs_convert_inline_inode(inode); } -static int f2fs_release_file(struct inode *inode, struct file *filp) -{ - /* some remained atomic pages should discarded */ - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) - commit_inmem_pages(inode, true); - return 0; -} - static int f2fs_ioc_commit_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); @@ -961,6 +968,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); mnt_drop_write_file(filp); + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); return ret; } @@ -971,11 +979,56 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + if (f2fs_is_volatile_file(inode)) + return 0; + set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); return f2fs_convert_inline_inode(inode); } +static int f2fs_ioc_release_volatile_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (!f2fs_is_volatile_file(inode)) + return 0; + + punch_hole(inode, 0, F2FS_BLKSIZE); + return 0; +} + +static int f2fs_ioc_abort_volatile_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + int ret; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + f2fs_balance_fs(F2FS_I_SB(inode)); + + if (f2fs_is_atomic_file(inode)) { + commit_inmem_pages(inode, false); + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + } + + if (f2fs_is_volatile_file(inode)) { + clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + filemap_fdatawrite(inode->i_mapping); + set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + } + mnt_drop_write_file(filp); + return ret; +} + static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1019,6 +1072,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_commit_atomic_write(filp); case F2FS_IOC_START_VOLATILE_WRITE: return f2fs_ioc_start_volatile_write(filp); + case F2FS_IOC_RELEASE_VOLATILE_WRITE: + return f2fs_ioc_release_volatile_write(filp); + case F2FS_IOC_ABORT_VOLATILE_WRITE: + return f2fs_ioc_abort_volatile_write(filp); case FITRIM: return f2fs_ioc_fitrim(filp, arg); default: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 26e8c1b5..eac64091 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -303,7 +303,7 @@ void f2fs_evict_inode(struct inode *inode) nid_t xnid = F2FS_I(inode)->i_xattr_nid; /* some remained atomic pages should discarded */ - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + if (f2fs_is_atomic_file(inode)) commit_inmem_pages(inode, true); trace_f2fs_evict_inode(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ad29bb5a..c9a23d97 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -61,6 +61,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size += (sbi->im[i].ino_num * sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else { + if (sbi->sb->s_bdi->dirty_exceeded) + return false; } return res; } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index d10b6448..036b8871 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -108,6 +108,7 @@ enum mem_type { NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ + BASE_CHECK, /* check kernel status */ }; struct nat_entry_set { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cfda2e31..b0b35290 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -289,7 +289,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) bool submit_bio = false; struct f2fs_io_info fio = { .type = DATA, - .rw = WRITE_SYNC, + .rw = WRITE_SYNC | REQ_PRIO, }; /* From ff823b8b6eb018196b21dc29cb6166b085f9c422 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Fri, 21 Nov 2014 15:42:07 +0900 Subject: [PATCH 093/321] f2fs: fix wrong data structure when create slab It used nat_entry_set when create slab for sit_entry_set. Signed-off-by: Changman Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b0b35290..4a7c0a9d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2308,7 +2308,7 @@ int __init create_segment_manager_caches(void) goto fail; sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", - sizeof(struct nat_entry_set)); + sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destory_discard_entry; From e3d48c65cb2ad0a92ea28321769bdc24daff2ccf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 14 Dec 2014 08:07:38 -0800 Subject: [PATCH 094/321] - f2fs: fix missing cp.reason is CP_SYNC The cp.reason should be assigned. Otherwise, if cp.reason has obsolete data like CP_DISCARD, it causes kernel panic like: Unable to handle kernel paging request at virtual address 32d1b444 pgd = da044000 [32d1b444] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: wlan(O) touchx [last unloaded: wlan] CPU: 2 Tainted: G O (3.4.42-g97affd0-00015-g5946620 #1) PC is at add_discard_addrs+0x4c/0x294 LR is at flush_sit_entries+0x5fc/0x724 ... (add_discard_addrs+0x4c/0x294) from [] (flush_sit_entries+0x5fc/0x724) (flush_sit_entries+0x5fc/0x724) from [] (write_checkpoint+0x290/0xb34) (write_checkpoint+0x290/0xb34) from [] (f2fs_gc+0xd0/0x3b4) (f2fs_gc+0xd0/0x3b4) from [] (f2fs_write_begin+0xcc/0x54c) This is because add_discard_addrs assumes that cpc->trim_start and cpc->trim_end should be correctly assigned when cpc->reason is CP_DISCARD. But, in this case, it didn't, which causes the panic in the following line. if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) { ---------- This is occurred in stable-v3.X only, because two patches were changed this line in dev-v3.X but one of them was not merged in stable-v3.X. Change-Id: Ida68336c979910e13d84489b306c3b8f96cbec9a Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d9adaa92..9fef7b5c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -697,7 +697,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi) int gc_type = BG_GC; int nfree = 0; int ret = -1; - struct cp_control cpc; + struct cp_control cpc = { + .reason = CP_SYNC, + }; struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), .iroot = RADIX_TREE_INIT(GFP_NOFS), From f62c44f8b2aadbae1e63567a697020a49f5a7df0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 12 Dec 2014 13:53:41 -0800 Subject: [PATCH 095/321] f2fs: fix small discards not to issue redundantly The ckpt_valid_map and cur_valid_map are synced by seg_info_to_raw_sit. In the case of small discards, the candidates are selected before sync, while fitrim selects candidates after sync. So, for small discards, we need to add candidates only just being obsoleted. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4a7c0a9d..63585e75 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -584,7 +584,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) bool force = (cpc->reason == CP_DISCARD); int i; - if (!force && !test_opt(sbi, DISCARD)) + if (!force && (!test_opt(sbi, DISCARD) || + SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)) return; if (force && !se->valid_blocks) { @@ -612,7 +613,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = ~(cur_map[i] | ckpt_map[i]); + dmap[i] = force ? ~ckpt_map[i] : + (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); @@ -1818,7 +1820,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) se = get_seg_entry(sbi, segno); /* add discard candidates */ - if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { + if (cpc->reason != CP_DISCARD) { cpc->trim_start = segno; add_discard_addrs(sbi, cpc); } From 8d6d27b6ab30f9cf7763de427e09d67d36acfd3d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 12 Dec 2014 19:40:02 -0800 Subject: [PATCH 096/321] f2fs: remove unnecessary call to invalidate inmemory pages Now we use inmemory pages for atomic write only and provide abort procedure, we don't need to truncate them explicitly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 --- fs/f2fs/f2fs.h | 1 - fs/f2fs/segment.c | 17 ----------------- 3 files changed, 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d4883e88..a25dbee7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1116,9 +1116,6 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) if (offset % PAGE_CACHE_SIZE) return; - if (f2fs_is_atomic_file(inode)) - invalidate_inmem_page(inode, page); - if (PageDirty(page)) inode_dec_dirty_pages(inode); ClearPagePrivate(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1d7220d1..394271b1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1396,7 +1396,6 @@ void destroy_node_manager_caches(void); * segment.c */ void register_inmem_page(struct inode *, struct page *); -void invalidate_inmem_page(struct inode *, struct page *); void commit_inmem_pages(struct inode *, bool); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 63585e75..5b36104e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -264,23 +264,6 @@ void register_inmem_page(struct inode *inode, struct page *page) mutex_unlock(&fi->inmem_lock); } -void invalidate_inmem_page(struct inode *inode, struct page *page) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); - struct inmem_pages *cur; - - mutex_lock(&fi->inmem_lock); - cur = radix_tree_lookup(&fi->inmem_root, page->index); - if (cur) { - radix_tree_delete(&fi->inmem_root, cur->page->index); - f2fs_put_page(cur->page, 0); - list_del(&cur->list); - kmem_cache_free(inmem_entry_slab, cur); - dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); - } - mutex_unlock(&fi->inmem_lock); -} - void commit_inmem_pages(struct inode *inode, bool abort) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); From e3817940c449ea5a7e86f1dd17c97ee46495d021 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 9 Dec 2014 14:21:46 +0800 Subject: [PATCH 097/321] f2fs: readahead contiguous current summary blocks in checkpoint Let's add readahead code for reading contiguous compact/normal summary blocks in checkpoint, then we will gain better performance in mount procedure. Changes from v1 o remove inappropriate 'unlikely' in npages_for_summary_flush. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 21 ++++++++++++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f0f2578f..121a3f36 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -923,7 +923,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->next_free_nid = cpu_to_le32(last_nid); /* 2 cp + n data seg summary + orphan inode blocks */ - data_sum_blocks = npages_for_summary_flush(sbi); + data_sum_blocks = npages_for_summary_flush(sbi, false); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 394271b1..6c774871 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1407,7 +1407,7 @@ void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void clear_prefree_segments(struct f2fs_sb_info *); void release_discard_addrs(struct f2fs_sb_info *); void discard_next_dnode(struct f2fs_sb_info *, block_t); -int npages_for_summary_flush(struct f2fs_sb_info *); +int npages_for_summary_flush(struct f2fs_sb_info *, bool); void allocate_new_segments(struct f2fs_sb_info *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5b36104e..59b56279 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -784,7 +784,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, /* * Calculate the number of current summary pages for writing */ -int npages_for_summary_flush(struct f2fs_sb_info *sbi) +int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) { int valid_sum_count = 0; int i, sum_in_page; @@ -792,8 +792,13 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi) for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { if (sbi->ckpt->alloc_type[i] == SSR) valid_sum_count += sbi->blocks_per_seg; - else - valid_sum_count += curseg_blkoff(sbi, i); + else { + if (for_ra) + valid_sum_count += le16_to_cpu( + F2FS_CKPT(sbi)->cur_data_blkoff[i]); + else + valid_sum_count += curseg_blkoff(sbi, i); + } } sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - @@ -1499,12 +1504,22 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) int err; if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + int npages = npages_for_summary_flush(sbi, true); + + if (npages >= 2) + ra_meta_pages(sbi, start_sum_block(sbi), npages, + META_CP); + /* restore for compacted data summary */ if (read_compacted_summaries(sbi)) return -EINVAL; type = CURSEG_HOT_NODE; } + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) + ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), + NR_CURSEG_TYPE - type, META_CP); + for (; type <= CURSEG_COLD_NODE; type++) { err = read_normal_summaries(sbi, type); if (err) From 36ec2cdad7d7dd840507405d57aad218943e6071 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Dec 2014 17:37:21 +0800 Subject: [PATCH 098/321] f2fs: merge two uchar variable in struct node_info to reduce memory cost This patch moves one member of struct nat_entry: _flag_ to struct node_info, so _version_ in struct node_info and _flag_ which are unsigned char type will merge to one 32-bit space in register/memory. So the size of nat_entry will be reduced from 28 bytes to 24 bytes (for 64-bit machine, reduce its size from 40 bytes to 32 bytes) and then slab memory using by f2fs will be reduced. changes from v2: o update description of memory usage gain for 64-bit machine suggested by Changman Lee. changes from v1: o introduce inline copy_node_info() to copy valid data from node info suggested by Jaegeuk Kim, it can avoid bug. Reviewed-by: Changman Lee Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- fs/f2fs/node.h | 33 ++++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c9a23d97..bb793262 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -269,7 +269,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); - e->ni = *ni; + copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { /* @@ -277,7 +277,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, * previous nat entry can be remained in nat cache. * So, reinitialize it with new information. */ - e->ni = *ni; + copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 036b8871..fa6f9596 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -29,6 +29,14 @@ /* return value for read_node_page */ #define LOCKED_PAGE 1 +/* For flag in struct node_info */ +enum { + IS_CHECKPOINTED, /* is it checkpointed before? */ + HAS_FSYNCED_INODE, /* is the inode fsynced before? */ + HAS_LAST_FSYNC, /* has the latest node fsync mark? */ + IS_DIRTY, /* this nat entry is dirty? */ +}; + /* * For node information */ @@ -37,18 +45,11 @@ struct node_info { nid_t ino; /* inode number of the node's owner */ block_t blk_addr; /* block address of the node */ unsigned char version; /* version of the node */ -}; - -enum { - IS_CHECKPOINTED, /* is it checkpointed before? */ - HAS_FSYNCED_INODE, /* is the inode fsynced before? */ - HAS_LAST_FSYNC, /* has the latest node fsync mark? */ - IS_DIRTY, /* this nat entry is dirty? */ + unsigned char flag; /* for node information bits */ }; struct nat_entry { struct list_head list; /* for clean or dirty nat list */ - unsigned char flag; /* for node information bits */ struct node_info ni; /* in-memory node information */ }; @@ -63,20 +64,30 @@ struct nat_entry { #define inc_node_version(version) (++version) +static inline void copy_node_info(struct node_info *dst, + struct node_info *src) +{ + dst->nid = src->nid; + dst->ino = src->ino; + dst->blk_addr = src->blk_addr; + dst->version = src->version; + /* should not copy flag here */ +} + static inline void set_nat_flag(struct nat_entry *ne, unsigned int type, bool set) { unsigned char mask = 0x01 << type; if (set) - ne->flag |= mask; + ne->ni.flag |= mask; else - ne->flag &= ~mask; + ne->ni.flag &= ~mask; } static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) { unsigned char mask = 0x01 << type; - return ne->flag & mask; + return ne->ni.flag & mask; } static inline void nat_reset_flag(struct nat_entry *ne) From a862a28066cde0643aa14d380d4bc4fd6682c7e5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Dec 2014 18:29:05 +0800 Subject: [PATCH 099/321] f2fs: use ra_meta_pages to simplify readahead code in restore_node_summary Use more common function ra_meta_pages() with META_POR to readahead node blocks in restore_node_summary() instead of ra_sum_pages(), hence we can simplify the readahead code there, and also we can remove unused function ra_sum_pages(). changes from v2: o use invalidate_mapping_pages as before suggested by Changman Lee. changes from v1: o fix one bug when using truncate_inode_pages_range which is pointed out by Jaegeuk Kim. Reviewed-by: Changman Lee Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 65 ++++++++++---------------------------------------- 1 file changed, 13 insertions(+), 52 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bb793262..e10a879c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1728,80 +1728,41 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) return 0; } -/* - * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-read pages are allocated in bd_inode's mapping tree. - */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, - int start, int nrpages) -{ - struct inode *inode = sbi->sb->s_bdev->bd_inode; - struct address_space *mapping = inode->i_mapping; - int i, page_idx = start; - struct f2fs_io_info fio = { - .type = META, - .rw = READ_SYNC | REQ_META | REQ_PRIO - }; - - for (i = 0; page_idx < start + nrpages; page_idx++, i++) { - /* alloc page in bd_inode for reading node summary info */ - pages[i] = grab_cache_page(mapping, page_idx); - if (!pages[i]) - break; - f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio); - } - - f2fs_submit_merged_bio(sbi, META, READ); - return i; -} - int restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum) { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct inode *inode = sbi->sb->s_bdev->bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(sbi); - struct page *pages[bio_blocks]; - int i, idx, last_offset, nrpages, err = 0; + int i, idx, last_offset, nrpages; /* scan the node segment */ last_offset = sbi->blocks_per_seg; addr = START_BLOCK(sbi, segno); sum_entry = &sum->entries[0]; - for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { + for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { nrpages = min(last_offset - i, bio_blocks); /* readahead node pages */ - nrpages = ra_sum_pages(sbi, pages, addr, nrpages); - if (!nrpages) - return -ENOMEM; + ra_meta_pages(sbi, addr, nrpages, META_POR); - for (idx = 0; idx < nrpages; idx++) { - if (err) - goto skip; + for (idx = addr; idx < addr + nrpages; idx++) { + struct page *page = get_meta_page(sbi, idx); - lock_page(pages[idx]); - if (unlikely(!PageUptodate(pages[idx]))) { - err = -EIO; - } else { - rn = F2FS_NODE(pages[idx]); - sum_entry->nid = rn->footer.nid; - sum_entry->version = 0; - sum_entry->ofs_in_node = 0; - sum_entry++; - } - unlock_page(pages[idx]); -skip: - page_cache_release(pages[idx]); + rn = F2FS_NODE(page); + sum_entry->nid = rn->footer.nid; + sum_entry->version = 0; + sum_entry->ofs_in_node = 0; + sum_entry++; + f2fs_put_page(page, 1); } - invalidate_mapping_pages(inode->i_mapping, addr, + invalidate_mapping_pages(META_MAPPING(sbi), addr, addr + nrpages); } - return err; + return 0; } static void remove_nats_in_journal(struct f2fs_sb_info *sbi) From fc5e4668a0373059f3fca64963f0bc8178cf3863 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 19:33:13 -0800 Subject: [PATCH 100/321] f2fs: use f2fs_io_info to clean up messy parameters during IO path This patch cleans up parameters on IO paths. The key idea is to use f2fs_io_info adding a parameter, block address, and then use this structure as parameters. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 24 ++++++++-------- fs/f2fs/data.c | 66 +++++++++++++++++++++++++++----------------- fs/f2fs/f2fs.h | 14 ++++++---- fs/f2fs/inline.c | 7 ++--- fs/f2fs/node.c | 14 +++++++--- fs/f2fs/segment.c | 28 +++++++++---------- 6 files changed, 87 insertions(+), 66 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 121a3f36..a2d28ff9 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -50,6 +50,11 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); struct page *page; + struct f2fs_io_info fio = { + .type = META, + .rw = READ_SYNC | REQ_META | REQ_PRIO, + .blk_addr = index, + }; repeat: page = grab_cache_page(mapping, index); if (!page) { @@ -59,8 +64,7 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) if (PageUptodate(page)) goto out; - if (f2fs_submit_page_bio(sbi, page, index, - READ_SYNC | REQ_META | REQ_PRIO)) + if (f2fs_submit_page_bio(sbi, page, &fio)) goto repeat; lock_page(page); @@ -113,14 +117,12 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type block_t prev_blk_addr = 0; struct page *page; block_t blkno = start; - struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; for (; nrpages-- > 0; blkno++) { - block_t blk_addr; if (!is_valid_blkaddr(sbi, blkno, type)) goto out; @@ -131,27 +133,27 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) blkno = 0; /* get nat block addr */ - blk_addr = current_nat_addr(sbi, + fio.blk_addr = current_nat_addr(sbi, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: /* get sit block addr */ - blk_addr = current_sit_addr(sbi, + fio.blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); - if (blkno != start && prev_blk_addr + 1 != blk_addr) + if (blkno != start && prev_blk_addr + 1 != fio.blk_addr) goto out; - prev_blk_addr = blk_addr; + prev_blk_addr = fio.blk_addr; break; case META_SSA: case META_CP: case META_POR: - blk_addr = blkno; + fio.blk_addr = blkno; break; default: BUG(); } - page = grab_cache_page(META_MAPPING(sbi), blk_addr); + page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr); if (!page) continue; if (PageUptodate(page)) { @@ -159,7 +161,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type continue; } - f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); + f2fs_submit_page_mbio(sbi, page, &fio); f2fs_put_page(page, 0); } out: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a25dbee7..8bde2b8f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -132,14 +132,14 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, * Return unlocked page. */ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, int rw) + struct f2fs_io_info *fio) { struct bio *bio; - trace_f2fs_submit_page_bio(page, blk_addr, rw); + trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw); /* Allocate a new bio */ - bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); + bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { bio_put(bio); @@ -147,12 +147,12 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, return -EFAULT; } - submit_bio(rw, bio); + submit_bio(fio->rw, bio); return 0; } void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, struct f2fs_io_info *fio) + struct f2fs_io_info *fio) { enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; @@ -160,21 +160,21 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, io = is_read ? &sbi->read_io : &sbi->write_io[btype]; - verify_block_addr(sbi, blk_addr); + verify_block_addr(sbi, fio->blk_addr); down_write(&io->io_rwsem); if (!is_read) inc_page_count(sbi, F2FS_WRITEBACK); - if (io->bio && (io->last_block_in_bio != blk_addr - 1 || + if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 || io->fio.rw != fio->rw)) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { int bio_blocks = MAX_BIO_BLOCKS(sbi); - io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); + io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read); io->fio = *fio; } @@ -184,10 +184,10 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, goto alloc_new; } - io->last_block_in_bio = blk_addr; + io->last_block_in_bio = fio->blk_addr; up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); + trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, fio->blk_addr); } /* @@ -376,6 +376,10 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) struct dnode_of_data dn; struct page *page; int err; + struct f2fs_io_info fio = { + .type = DATA, + .rw = sync ? READ_SYNC : READA, + }; page = find_get_page(mapping, index); if (page && PageUptodate(page)) @@ -404,8 +408,8 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) return page; } - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, - sync ? READ_SYNC : READA); + fio.blk_addr = dn.data_blkaddr; + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); if (err) return ERR_PTR(err); @@ -430,7 +434,10 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct dnode_of_data dn; struct page *page; int err; - + struct f2fs_io_info fio = { + .type = DATA, + .rw = READ_SYNC, + }; repeat: page = grab_cache_page(mapping, index); if (!page) @@ -464,8 +471,8 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) return page; } - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, - dn.data_blkaddr, READ_SYNC); + fio.blk_addr = dn.data_blkaddr; + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); if (err) return ERR_PTR(err); @@ -515,8 +522,12 @@ struct page *get_new_data_page(struct inode *inode, zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); } else { - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, - dn.data_blkaddr, READ_SYNC); + struct f2fs_io_info fio = { + .type = DATA, + .rw = READ_SYNC, + .blk_addr = dn.data_blkaddr, + }; + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); if (err) goto put_err; @@ -745,7 +756,6 @@ static int f2fs_read_data_pages(struct file *file, int do_write_data_page(struct page *page, struct f2fs_io_info *fio) { struct inode *inode = page->mapping->host; - block_t old_blkaddr, new_blkaddr; struct dnode_of_data dn; int err = 0; @@ -754,10 +764,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) if (err) return err; - old_blkaddr = dn.data_blkaddr; + fio->blk_addr = dn.data_blkaddr; /* This page is already truncated */ - if (old_blkaddr == NULL_ADDR) + if (fio->blk_addr == NULL_ADDR) goto out_writepage; set_page_writeback(page); @@ -766,14 +776,14 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (unlikely(old_blkaddr != NEW_ADDR && + if (unlikely(fio->blk_addr != NEW_ADDR && !is_cold_data(page) && need_inplace_update(inode))) { - rewrite_data_page(page, old_blkaddr, fio); + rewrite_data_page(page, fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); } else { - write_data_page(page, &dn, &new_blkaddr, fio); - update_extent_cache(new_blkaddr, &dn); + write_data_page(page, &dn, fio); + update_extent_cache(fio->blk_addr, &dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); } out_writepage: @@ -1007,8 +1017,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); + struct f2fs_io_info fio = { + .type = DATA, + .rw = READ_SYNC, + .blk_addr = dn.data_blkaddr, + }; + err = f2fs_submit_page_bio(sbi, page, &fio); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6c774871..9e05dcd8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -490,6 +490,7 @@ enum page_type { struct f2fs_io_info { enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ + block_t blk_addr; /* block address to be written */ }; #define is_read_io(rw) (((rw) & 1) == READ) @@ -1413,10 +1414,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_io_info *, unsigned int, block_t, block_t *); -void write_data_page(struct page *, struct dnode_of_data *, block_t *, - struct f2fs_io_info *); -void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); + unsigned int, struct f2fs_io_info *); +void write_data_page(struct page *, struct dnode_of_data *, + struct f2fs_io_info *); +void rewrite_data_page(struct page *, struct f2fs_io_info *); void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); void allocate_data_block(struct f2fs_sb_info *, struct page *, @@ -1463,8 +1464,9 @@ void destroy_checkpoint_caches(void); * data.c */ void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); -int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); -void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, +int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, + struct f2fs_io_info *); +void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4674392e..bb4f3517 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -79,7 +79,6 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { void *src_addr, *dst_addr; - block_t new_blk_addr; struct f2fs_io_info fio = { .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, @@ -115,9 +114,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) /* write data page to try to make data consistent */ set_page_writeback(page); - - write_data_page(page, dn, &new_blk_addr, &fio); - update_extent_cache(new_blk_addr, dn); + fio.blk_addr = dn->data_blkaddr; + write_data_page(page, dn, &fio); + update_extent_cache(fio.blk_addr, dn); f2fs_wait_on_page_writeback(page, DATA); if (dirty) inode_dec_dirty_pages(dn->inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e10a879c..62654bb1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -977,6 +977,10 @@ static int read_node_page(struct page *page, int rw) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; + struct f2fs_io_info fio = { + .type = NODE, + .rw = rw, + }; get_node_info(sbi, page->index, &ni); @@ -988,7 +992,8 @@ static int read_node_page(struct page *page, int rw) if (PageUptodate(page)) return LOCKED_PAGE; - return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw); + fio.blk_addr = ni.blk_addr; + return f2fs_submit_page_bio(sbi, page, &fio); } /* @@ -1271,7 +1276,6 @@ static int f2fs_write_node_page(struct page *page, { struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; - block_t new_addr; struct node_info ni; struct f2fs_io_info fio = { .type = NODE, @@ -1306,9 +1310,11 @@ static int f2fs_write_node_page(struct page *page, } else { down_read(&sbi->node_write); } + set_page_writeback(page); - write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); - set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); + fio.blk_addr = ni.blk_addr; + write_node_page(sbi, page, nid, &fio); + set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); unlock_page(page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 59b56279..293b34b1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1241,39 +1241,39 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, struct f2fs_io_info *fio) + struct f2fs_summary *sum, + struct f2fs_io_info *fio) { int type = __get_segment_type(page, fio->type); - allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); + allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type); /* writeout dirty page into bdev */ - f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); + f2fs_submit_page_mbio(sbi, page, fio); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_io_info fio = { .type = META, - .rw = WRITE_SYNC | REQ_META | REQ_PRIO + .rw = WRITE_SYNC | REQ_META | REQ_PRIO, + .blk_addr = page->index, }; set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, page->index, &fio); + f2fs_submit_page_mbio(sbi, page, &fio); } void write_node_page(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_io_info *fio, - unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) + unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio); + do_write_page(sbi, page, &sum, fio); } void write_data_page(struct page *page, struct dnode_of_data *dn, - block_t *new_blkaddr, struct f2fs_io_info *fio) + struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; @@ -1282,14 +1282,12 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - - do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio); + do_write_page(sbi, page, &sum, fio); } -void rewrite_data_page(struct page *page, block_t old_blkaddr, - struct f2fs_io_info *fio) +void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) { - f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio); + f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio); } void recover_data_page(struct f2fs_sb_info *sbi, From 436d9314c36892e8a328129930acc6272c1c5b85 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 18 Dec 2014 19:32:36 -0800 Subject: [PATCH 101/321] f2fs: avoid double lock for cp_rwsem The __f2fs_add_link is covered by cp_rwsem all the time. This calls init_inode_metadata, which conducts some acl operations including memory allocation with GFP_KERNEL previously. But, under memory pressure, f2fs_write_data_page can be called, which also grabs cp_rwsem too. In this case, this incurs a deadlock pointed by Chao. Thread #1 Thread #2 down_read down_write down_read -> here down_read should wait forever. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index c6aa0418..5b952c05 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) if (count == 0) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); @@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) int i; f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * - sizeof(struct f2fs_acl_entry), GFP_KERNEL); + sizeof(struct f2fs_acl_entry), GFP_NOFS); if (!f2fs_acl) return ERR_PTR(-ENOMEM); From 63e152598ebbe13f58795d92ed2fab5ed52481c3 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Wed, 24 Dec 2014 02:16:54 +0900 Subject: [PATCH 102/321] f2fs: add block count by in-place-update in stat info This patch adds block count by in-place-update in stat. Signed-off-by: Changman Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 4 ++++ fs/f2fs/f2fs.h | 6 +++++- fs/f2fs/segment.c | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 91e8f699..2b642215 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -79,6 +79,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; } + + si->inplace_count = atomic_read(&sbi->inplace_count); } /* @@ -277,6 +279,7 @@ static int stat_show(struct seq_file *s, void *v) for (j = 0; j < si->util_free; j++) seq_putc(s, '-'); seq_puts(s, "]\n\n"); + seq_printf(s, "IPU: %u blocks\n", si->inplace_count); seq_printf(s, "SSR: %u blocks in %u segments\n", si->block_count[SSR], si->segment_count[SSR]); seq_printf(s, "LFS: %u blocks in %u segments\n", @@ -331,6 +334,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); + atomic_set(&sbi->inplace_count, 0); mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9e05dcd8..a297af5c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -591,6 +591,7 @@ struct f2fs_sb_info { struct f2fs_stat_info *stat_info; /* FS status information */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ + atomic_t inplace_count; /* # of inplace update */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ @@ -1522,6 +1523,7 @@ struct f2fs_stat_info { unsigned int segment_count[2]; unsigned int block_count[2]; + unsigned int inplace_count; unsigned base_mem, cache_mem; }; @@ -1561,7 +1563,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ ((sbi)->block_count[(curseg)->alloc_type]++) - +#define stat_inc_inplace_blocks(sbi) \ + (atomic_inc(&(sbi)->inplace_count)) #define stat_inc_seg_count(sbi, type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ @@ -1607,6 +1610,7 @@ void f2fs_destroy_root_stats(void); #define stat_dec_inline_dir(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) +#define stat_inc_inplace_blocks(sbi) #define stat_inc_seg_count(si, type) #define stat_inc_tot_blk_count(si, blks) #define stat_inc_data_blk_count(si, blks) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 293b34b1..5440d4a7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1287,6 +1287,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) { + stat_inc_inplace_blocks(F2FS_P_SB(page)); f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio); } From 7ebfe7277aa558f23479ce5b1bf6133233822b66 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 23 Dec 2014 16:26:31 -0800 Subject: [PATCH 103/321] f2fs: fix missing cold bit during recovery In do_recover_data, we find and update previous node pages after updating its new block addresses. After then, we call fill_node_footer without reset field, we erase its cold bit so that this new cold node block is written to wrong log area. This patch fixes not to miss its old flag. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 10 +++++++++- include/linux/f2fs_fs.h | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index fa6f9596..cac8a3d9 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -212,11 +212,19 @@ static inline void fill_node_footer(struct page *page, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { struct f2fs_node *rn = F2FS_NODE(page); + unsigned int old_flag = 0; + if (reset) memset(rn, 0, sizeof(*rn)); + else + old_flag = le32_to_cpu(rn->footer.flag); + rn->footer.nid = cpu_to_le32(nid); rn->footer.ino = cpu_to_le32(ino); - rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); + + /* should remain old flag bits such as COLD_BIT_SHIFT */ + rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) | + (old_flag & OFFSET_BIT_MASK)); } static inline void copy_node_footer(struct page *dst, struct page *src) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index cc1064f3..0db7b366 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -228,6 +228,8 @@ enum { OFFSET_BIT_SHIFT }; +#define OFFSET_BIT_MASK (0x07) /* (0x01 << OFFSET_BIT_SHIFT) - 1 */ + struct node_footer { __le32 nid; /* node id */ __le32 ino; /* inode nunmber */ From edcd8ff2d2957f5f0da01c523313e259f4e19f2a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 23 Dec 2014 16:35:21 +0800 Subject: [PATCH 104/321] f2fs: cleanup trace event of f2fs_submit_page_{m,}bio with DECLARE_EVENT_CLASS This patch adds missing parameter _type_ for trace_f2fs_submit_page_bio, then use DECLARE_EVENT_CLASS/DEFINE_EVENT_CONDITION pair to cleanup some trace event code related to f2fs_submit_page_{m,}bio. Additionally, after we remove redundant code, size of code can be reduced: text data bss dec hex filename 176787 8712 56 185555 2d4d3 f2fs.ko.org 174408 8648 56 183112 2cb48 f2fs.ko Change-Id: Ib3bb01630d2b13191e2e67c6a7de9c49d2a96cd0 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +- include/trace/events/f2fs.h | 115 ++++++++++++++++-------------------- 2 files changed, 53 insertions(+), 66 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8bde2b8f..378da02f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -136,7 +136,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, { struct bio *bio; - trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw); + trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw, fio->type); /* Allocate a new bio */ bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); @@ -187,7 +187,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, io->last_block_in_bio = fio->blk_addr; up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, fio->blk_addr); + trace_f2fs_submit_page_mbio(page, fio->blk_addr, fio->rw, fio->type); } /* diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index e86c1eaf..32843555 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -440,38 +440,6 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, __entry->err) ); -TRACE_EVENT_CONDITION(f2fs_submit_page_bio, - - TP_PROTO(struct page *page, sector_t blkaddr, int type), - - TP_ARGS(page, blkaddr, type), - - TP_CONDITION(page->mapping), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(pgoff_t, index) - __field(sector_t, blkaddr) - __field(int, type) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->index = page->index; - __entry->blkaddr = blkaddr; - __entry->type = type; - ), - - TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "blkaddr = 0x%llx, bio_type = %s%s", - show_dev_ino(__entry), - (unsigned long)__entry->index, - (unsigned long long)__entry->blkaddr, - show_bio_type(__entry->type)) -); - TRACE_EVENT(f2fs_get_data_block, TP_PROTO(struct inode *inode, sector_t iblock, struct buffer_head *bh, int ret), @@ -680,6 +648,57 @@ TRACE_EVENT(f2fs_reserve_new_block, __entry->ofs_in_node) ); +DECLARE_EVENT_CLASS(f2fs__submit_page_bio, + + TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + + TP_ARGS(page, blkaddr, rw, type), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, index) + __field(block_t, blkaddr) + __field(int, rw) + __field(int, type) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->blkaddr = blkaddr; + __entry->rw = rw; + __entry->type = type; + ), + + TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + "blkaddr = 0x%llx, rw = %s%s, type = %s", + show_dev_ino(__entry), + (unsigned long)__entry->index, + (unsigned long long)__entry->blkaddr, + show_bio_type(__entry->rw), + show_block_type(__entry->type)) +); + +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, + + TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + + TP_ARGS(page, blkaddr, rw, type), + + TP_CONDITION(page->mapping) +); + +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, + + TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + + TP_ARGS(page, blkaddr, rw, type), + + TP_CONDITION(page->mapping) +); + DECLARE_EVENT_CLASS(f2fs__submit_bio, TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), @@ -913,38 +932,6 @@ TRACE_EVENT(f2fs_writepages, __entry->range_cyclic) ); -TRACE_EVENT(f2fs_submit_page_mbio, - - TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), - - TP_ARGS(page, rw, type, blk_addr), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(int, rw) - __field(int, type) - __field(pgoff_t, index) - __field(block_t, block) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->rw = rw; - __entry->type = type; - __entry->index = page->index; - __entry->block = blk_addr; - ), - - TP_printk("dev = (%d,%d), ino = %lu, %s%s, %s, index = %lu, blkaddr = 0x%llx", - show_dev_ino(__entry), - show_bio_type(__entry->rw), - show_block_type(__entry->type), - (unsigned long)__entry->index, - (unsigned long long)__entry->block) -); - TRACE_EVENT(f2fs_write_checkpoint, TP_PROTO(struct super_block *sb, int reason, char *msg), From ec02cf26922a06e46c8eead2424fc0705d1790f1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Dec 2014 16:08:14 +0800 Subject: [PATCH 105/321] f2fs: cleanup parameters for trace_f2fs_submit_{read_,write_,page_,page_m}bio with fio Cleanup parameters for trace_f2fs_submit_{read_,write_,page_,page_m}bio with fio as one parameter. Suggested-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: include/trace/events/f2fs.h Change-Id: I7ea9f234514c1dd80def7e801b34f70d1df55a05 --- fs/f2fs/data.c | 10 ++++------ include/trace/events/f2fs.h | 37 ++++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 378da02f..3bcb57e2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -95,11 +95,9 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) return; if (is_read_io(fio->rw)) - trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, - fio->type, io->bio); + trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); else - trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, - fio->type, io->bio); + trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); submit_bio(fio->rw, io->bio); io->bio = NULL; @@ -136,7 +134,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, { struct bio *bio; - trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw, fio->type); + trace_f2fs_submit_page_bio(page, fio); /* Allocate a new bio */ bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); @@ -187,7 +185,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, io->last_block_in_bio = fio->blk_addr; up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio->blk_addr, fio->rw, fio->type); + trace_f2fs_submit_page_mbio(page, fio); } /* diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 32843555..a3e6dd4b 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -650,9 +650,9 @@ TRACE_EVENT(f2fs_reserve_new_block, DECLARE_EVENT_CLASS(f2fs__submit_page_bio, - TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + TP_PROTO(struct page *page, struct f2fs_io_info *fio), - TP_ARGS(page, blkaddr, rw, type), + TP_ARGS(page, fio), TP_STRUCT__entry( __field(dev_t, dev) @@ -667,9 +667,9 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __entry->dev = page->mapping->host->i_sb->s_dev; __entry->ino = page->mapping->host->i_ino; __entry->index = page->index; - __entry->blkaddr = blkaddr; - __entry->rw = rw; - __entry->type = type; + __entry->blkaddr = fio->blk_addr; + __entry->rw = fio->rw; + __entry->type = fio->type; ), TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " @@ -683,27 +683,28 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, - TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + TP_PROTO(struct page *page, struct f2fs_io_info *fio), - TP_ARGS(page, blkaddr, rw, type), + TP_ARGS(page, fio), TP_CONDITION(page->mapping) ); DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, - TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + TP_PROTO(struct page *page, struct f2fs_io_info *fio), - TP_ARGS(page, blkaddr, rw, type), + TP_ARGS(page, fio), TP_CONDITION(page->mapping) ); DECLARE_EVENT_CLASS(f2fs__submit_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_STRUCT__entry( __field(dev_t, dev) @@ -715,8 +716,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->rw = rw; - __entry->type = type; + __entry->rw = fio->rw; + __entry->type = fio->type; __entry->sector = bio->bi_sector; __entry->size = bio->bi_size; ), @@ -731,18 +732,20 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_CONDITION(bio) ); DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_CONDITION(bio) ); From 92a43f318b2355c7e3c502ae1ca33d8000d1acb7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 29 Dec 2014 15:56:18 +0800 Subject: [PATCH 106/321] f2fs: reuse inode_entry_slab in gc procedure for using slab more effectively There are two slab cache inode_entry_slab and winode_slab using the same structure as below: struct dir_inode_entry { struct list_head list; /* list head */ struct inode *inode; /* vfs inode pointer */ }; struct inode_entry { struct list_head list; struct inode *inode; }; It's a little waste that the two cache can not share their memory space for each other. So in this patch we remove one redundant winode_slab slab cache, then use more universal name struct inode_entry as remaining data structure name of slab, finally we reuse the inode_entry_slab to store dirty dir item and gc item for more effective. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 18 +++++++++--------- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 14 +++++++++----- fs/f2fs/gc.c | 20 ++------------------ fs/f2fs/gc.h | 7 ++----- fs/f2fs/super.c | 8 +------- 6 files changed, 24 insertions(+), 45 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a2d28ff9..e7e1f7ac 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -23,7 +23,7 @@ #include static struct kmem_cache *ino_entry_slab; -static struct kmem_cache *inode_entry_slab; +struct kmem_cache *inode_entry_slab; /* * We guarantee no failure on the returned page. @@ -672,7 +672,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) return -EINVAL; } -static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) +static int __add_dirty_inode(struct inode *inode, struct inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -689,7 +689,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new; + struct inode_entry *new; int ret = 0; if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) @@ -718,7 +718,7 @@ void update_dirty_page(struct inode *inode, struct page *page) void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new = + struct inode_entry *new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); int ret = 0; @@ -736,7 +736,7 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *entry; + struct inode_entry *entry; if (!S_ISDIR(inode->i_mode)) return; @@ -766,7 +766,7 @@ void remove_dirty_dir_inode(struct inode *inode) void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; - struct dir_inode_entry *entry; + struct inode_entry *entry; struct inode *inode; retry: if (unlikely(f2fs_cp_error(sbi))) @@ -779,7 +779,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) spin_unlock(&sbi->dir_inode_lock); return; } - entry = list_entry(head->next, struct dir_inode_entry, list); + entry = list_entry(head->next, struct inode_entry, list); inode = igrab(entry->inode); spin_unlock(&sbi->dir_inode_lock); if (inode) { @@ -1105,8 +1105,8 @@ int __init create_checkpoint_caches(void) sizeof(struct ino_entry)); if (!ino_entry_slab) return -ENOMEM; - inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", - sizeof(struct dir_inode_entry)); + inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", + sizeof(struct inode_entry)); if (!inode_entry_slab) { kmem_cache_destroy(ino_entry_slab); return -ENOMEM; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 2b642215..dd7835b2 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -172,7 +172,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); + si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a297af5c..fae860b9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -135,8 +135,14 @@ struct ino_entry { nid_t ino; /* inode number */ }; -/* for the list of directory inodes */ -struct dir_inode_entry { +/* + * for the list of directory inodes or gc inodes. + * NOTE: there are two slab users for this structure, if we add/modify/delete + * fields in structure for one of slab users, it may affect fields or size of + * other one, in this condition, it's better to split both of slab and related + * data structure. + */ +struct inode_entry { struct list_head list; /* list head */ struct inode *inode; /* vfs inode pointer */ }; @@ -296,7 +302,7 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ - struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct inode_entry *dirty_dir; /* the pointer of dirty dir */ struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ @@ -1486,8 +1492,6 @@ void stop_gc_thread(struct f2fs_sb_info *); block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); int f2fs_gc(struct f2fs_sb_info *); void build_gc_manager(struct f2fs_sb_info *); -int __init create_gc_caches(void); -void destroy_gc_caches(void); /* * recovery.c diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9fef7b5c..429ff218 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -24,8 +24,6 @@ #include "gc.h" #include -static struct kmem_cache *winode_slab; - static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; @@ -356,7 +354,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) iput(inode); return; } - new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); + new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); new_ie->inode = inode; retry: if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { @@ -373,7 +371,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list) radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); - kmem_cache_free(winode_slab, ie); + kmem_cache_free(inode_entry_slab, ie); } } @@ -750,17 +748,3 @@ void build_gc_manager(struct f2fs_sb_info *sbi) { DIRTY_I(sbi)->v_ops = &default_v_ops; } - -int __init create_gc_caches(void) -{ - winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", - sizeof(struct inode_entry)); - if (!winode_slab) - return -ENOMEM; - return 0; -} - -void destroy_gc_caches(void) -{ - kmem_cache_destroy(winode_slab); -} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 6ff7ad38..524543a6 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -35,16 +35,13 @@ struct f2fs_gc_kthread { unsigned int gc_idle; }; -struct inode_entry { - struct list_head list; - struct inode *inode; -}; - struct gc_inode_list { struct list_head ilist; struct radix_tree_root iroot; }; +extern struct kmem_cache *inode_entry_slab; + /* * inline functions */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7e23f005..a6a8568f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1222,12 +1222,9 @@ static int __init init_f2fs_fs(void) err = create_segment_manager_caches(); if (err) goto free_node_manager_caches; - err = create_gc_caches(); - if (err) - goto free_segment_manager_caches; err = create_checkpoint_caches(); if (err) - goto free_gc_caches; + goto free_segment_manager_caches; f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); if (!f2fs_kset) { err = -ENOMEM; @@ -1244,8 +1241,6 @@ static int __init init_f2fs_fs(void) kset_unregister(f2fs_kset); free_checkpoint_caches: destroy_checkpoint_caches(); -free_gc_caches: - destroy_gc_caches(); free_segment_manager_caches: destroy_segment_manager_caches(); free_node_manager_caches: @@ -1262,7 +1257,6 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); destroy_checkpoint_caches(); - destroy_gc_caches(); destroy_segment_manager_caches(); destroy_node_manager_caches(); destroy_inodecache(); From 039ae1b0e988730bad9c6b1a2400d00687995834 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 30 Dec 2014 22:57:55 -0800 Subject: [PATCH 107/321] f2fs: clean up to remove parameter This patch uses dn->data_blkaddr as a parameter for the destination block address. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 27 ++++++++++++++------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 3 ++- fs/f2fs/inline.c | 2 +- fs/f2fs/recovery.c | 3 ++- fs/f2fs/segment.c | 1 + 6 files changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3bcb57e2..9c777311 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -194,7 +194,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, * ->node_page * update block addresses in the node page */ -static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) +static void __set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn; __le32 *addr_array; @@ -207,7 +207,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); - addr_array[ofs_in_node] = cpu_to_le32(new_addr); + addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); set_page_dirty(node_page); } @@ -222,8 +222,8 @@ int reserve_new_block(struct dnode_of_data *dn) trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); - __set_data_blkaddr(dn, NEW_ADDR); dn->data_blkaddr = NEW_ADDR; + __set_data_blkaddr(dn); mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; @@ -288,19 +288,19 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } -void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) +void update_extent_cache(struct dnode_of_data *dn) { struct f2fs_inode_info *fi = F2FS_I(dn->inode); pgoff_t fofs, start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; int need_update = true; - f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); + f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; /* Update the page address in the parent node */ - __set_data_blkaddr(dn, blk_addr); + __set_data_blkaddr(dn); if (is_inode_flag_set(fi, FI_NO_EXTENT)) return; @@ -318,16 +318,16 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) /* Initial extent */ if (fi->ext.len == 0) { - if (blk_addr != NULL_ADDR) { + if (dn->data_blkaddr != NULL_ADDR) { fi->ext.fofs = fofs; - fi->ext.blk_addr = blk_addr; + fi->ext.blk_addr = dn->data_blkaddr; fi->ext.len = 1; } goto end_update; } /* Front merge */ - if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { + if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { fi->ext.fofs--; fi->ext.blk_addr--; fi->ext.len++; @@ -335,7 +335,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) } /* Back merge */ - if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { + if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { fi->ext.len++; goto end_update; } @@ -569,8 +569,8 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; - __set_data_blkaddr(dn, NEW_ADDR); dn->data_blkaddr = NEW_ADDR; + __set_data_blkaddr(dn); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); @@ -581,7 +581,8 @@ static int __allocate_data_block(struct dnode_of_data *dn) /* direct IO doesn't use extent cache to maximize the performance */ set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); - update_extent_cache(new_blkaddr, dn); + dn->data_blkaddr = new_blkaddr; + update_extent_cache(dn); clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); /* update i_size */ @@ -781,7 +782,7 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); } else { write_data_page(page, &dn, fio); - update_extent_cache(fio->blk_addr, &dn); + update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); } out_writepage: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fae860b9..e5b1f084 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1477,7 +1477,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); -void update_extent_cache(block_t, struct dnode_of_data *); +void update_extent_cache(struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 93a92b33..90fe3b52 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -440,7 +440,8 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) if (blkaddr == NULL_ADDR) continue; - update_extent_cache(NULL_ADDR, dn); + dn->data_blkaddr = NULL_ADDR; + update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); nr_free++; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index bb4f3517..020c7169 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -116,7 +116,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_page_writeback(page); fio.blk_addr = dn->data_blkaddr; write_data_page(page, dn, &fio); - update_extent_cache(fio.blk_addr, dn); + update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); if (dirty) inode_dec_dirty_pages(dn->inode); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 34be4d76..2b48b6c1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -396,7 +396,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); - update_extent_cache(dest, &dn); + dn.data_blkaddr = dest; + update_extent_cache(&dn); recovered++; } dn.ofs_in_node++; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5440d4a7..2bb8c79d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1283,6 +1283,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(sbi, page, &sum, fio); + dn->data_blkaddr = fio->blk_addr; } void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) From 3ca3321aba331ccdc5f53a3be800903a271c2ce4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 30 Dec 2014 23:08:26 -0800 Subject: [PATCH 108/321] f2fs: avoid potential unnecessary codes This patch relocates some operations to avoid unnecessary execution. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- fs/f2fs/node.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9c777311..708da0b9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -296,8 +296,6 @@ void update_extent_cache(struct dnode_of_data *dn) int need_update = true; f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + - dn->ofs_in_node; /* Update the page address in the parent node */ __set_data_blkaddr(dn); @@ -305,6 +303,9 @@ void update_extent_cache(struct dnode_of_data *dn) if (is_inode_flag_set(fi, FI_NO_EXTENT)) return; + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + write_lock(&fi->ext.ext_lock); start_fofs = fi->ext.fofs; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 62654bb1..64073b02 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -347,7 +347,6 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) struct nat_entry *e; int i; - memset(&ne, 0, sizeof(struct f2fs_nat_entry)); ni->nid = nid; /* Check nat cache */ @@ -362,6 +361,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) if (e) return; + memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + /* Check current segment summary */ mutex_lock(&curseg->curseg_mutex); i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); From 5d6ba37e52c68a30cacd9768fa0674a6d1338781 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 30 Dec 2014 23:12:11 -0800 Subject: [PATCH 109/321] f2fs: remove uncovered code path This patch removes unnecessary function calls. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 708da0b9..c438fd5a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -560,31 +560,22 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_inode_info *fi = F2FS_I(dn->inode); struct f2fs_summary sum; - block_t new_blkaddr; struct node_info ni; pgoff_t fofs; - int type; if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; - dn->data_blkaddr = NEW_ADDR; - __set_data_blkaddr(dn); - get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - type = CURSEG_WARM_DATA; - - allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); + allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, + CURSEG_WARM_DATA); /* direct IO doesn't use extent cache to maximize the performance */ - set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); - dn->data_blkaddr = new_blkaddr; - update_extent_cache(dn); - clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); + __set_data_blkaddr(dn); /* update i_size */ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + @@ -592,7 +583,6 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); - dn->data_blkaddr = new_blkaddr; return 0; } From 14ded60ce00205e2863ff16b169b712c6a8ec0ec Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 5 Jan 2015 16:02:20 -0800 Subject: [PATCH 110/321] f2fs: align direct_io'ed data to section This patch aligns the start block address of a file for direct io to the f2fs's section size. Some flash devices manage an over 4KB-sized page as a write unit, and if the direct_io'ed data are written but not aligned to that unit, the performance can be degraded due to the partial page copies. Thus, since f2fs has a section that is well aligned to FTL units, we can align the block address to the section size so that f2fs avoids this misalignment. Change-Id: I6672b5c86b0d64986802b2b3dd1714e1611dc5cd Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 +++++-- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/segment.c | 27 +++++++++++++++++++-------- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c438fd5a..391ff7a0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -561,6 +561,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct f2fs_inode_info *fi = F2FS_I(dn->inode); struct f2fs_summary sum; struct node_info ni; + int seg = CURSEG_WARM_DATA; pgoff_t fofs; if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) @@ -571,8 +572,10 @@ static int __allocate_data_block(struct dnode_of_data *dn) get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, - CURSEG_WARM_DATA); + if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) + seg = CURSEG_DIRECT_IO; + + allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); /* direct IO doesn't use extent cache to maximize the performance */ __set_data_blkaddr(dn); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e5b1f084..10332069 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -405,7 +405,8 @@ enum { CURSEG_HOT_NODE, /* direct node blocks of directory files */ CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ - NO_CHECK_TYPE + NO_CHECK_TYPE, + CURSEG_DIRECT_IO, /* to use for the direct IO path */ }; struct flush_cmd { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2bb8c79d..9b0cd93b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1081,18 +1081,22 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } +static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int old_segno; + + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + locate_dirty_segment(sbi, old_segno); +} + void allocate_new_segments(struct f2fs_sb_info *sbi) { - struct curseg_info *curseg; - unsigned int old_curseg; int i; - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - curseg = CURSEG_I(sbi, i); - old_curseg = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_curseg); - } + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) + __allocate_new_segments(sbi, i); } static const struct segment_allocation default_salloc_ops = { @@ -1205,11 +1209,18 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; + bool direct_io = (type == CURSEG_DIRECT_IO); + + type = direct_io ? CURSEG_WARM_DATA : type; curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); + /* direct_io'ed data is aligned to the segment for better performance */ + if (direct_io && curseg->next_blkoff) + __allocate_new_segments(sbi, type); + *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); /* From 921edeb97c8ca8dea3206eec800135b2ce3454c3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 Jan 2015 14:28:43 +0800 Subject: [PATCH 111/321] f2fs: get rid of kzalloc in __recover_inline_status We use kzalloc to allocate memory in __recover_inline_status, and use this all-zero memory to check the inline date content of inode page by comparing them. This is low effective and not needed, let's check inline date content directly. Signed-off-by: Chao Yu [Jaegeuk Kim: make the code more neat] Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index eac64091..12028d9b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -66,29 +66,23 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static int __recover_inline_status(struct inode *inode, struct page *ipage) +static void __recover_inline_status(struct inode *inode, struct page *ipage) { void *inline_data = inline_data_addr(ipage); - struct f2fs_inode *ri; - void *zbuf; + __le32 *start = inline_data; + __le32 *end = start + MAX_INLINE_DATA / sizeof(__le32); - zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); - if (!zbuf) - return -ENOMEM; + while (start < end) { + if (*start++) { + f2fs_wait_on_page_writeback(ipage, NODE); - if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { - kfree(zbuf); - return 0; + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage)); + set_page_dirty(ipage); + return; + } } - kfree(zbuf); - - f2fs_wait_on_page_writeback(ipage, NODE); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); - - ri = F2FS_INODE(ipage); - set_raw_inline(F2FS_I(inode), ri); - set_page_dirty(ipage); - return 0; + return; } static int do_read_inode(struct inode *inode) @@ -97,7 +91,6 @@ static int do_read_inode(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; - int err = 0; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -141,7 +134,7 @@ static int do_read_inode(struct inode *inode) /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) - err = __recover_inline_status(inode, node_page); + __recover_inline_status(inode, node_page); /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); @@ -151,7 +144,7 @@ static int do_read_inode(struct inode *inode) stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); - return err; + return 0; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) From 03972dc984e8c14086a5dc9faadf317f45806802 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 6 Jan 2015 16:00:03 -0800 Subject: [PATCH 112/321] f2fs: fix wrong unlock_page call This patch removes wrongly called unlock_page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 391ff7a0..ef510e14 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -834,7 +834,6 @@ static int f2fs_write_data_page(struct page *page, /* we should bypass data pages to proceed the kworkder jobs */ if (unlikely(f2fs_cp_error(sbi))) { SetPageError(page); - unlock_page(page); goto out; } From ff58225f32085219320c83e77cf98ebc43e28cc2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Jan 2015 10:47:57 -0800 Subject: [PATCH 113/321] f2fs: free radix_tree_nodes used by nat_set entries In the normal case, the radix_tree_nodes are freed successfully. But, when cp_error was detected, we should destroy them forcefully. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 21 +++++++++++++++++++-- fs/f2fs/node.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 64073b02..1b3f9e9f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1893,7 +1893,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - struct nat_entry_set *setvec[NATVEC_SIZE]; + struct nat_entry_set *setvec[SETVEC_SIZE]; struct nat_entry_set *set, *tmp; unsigned int found; nid_t set_idx = 0; @@ -1910,7 +1910,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) remove_nats_in_journal(sbi); while ((found = __gang_lookup_nat_set(nm_i, - set_idx, NATVEC_SIZE, setvec))) { + set_idx, SETVEC_SIZE, setvec))) { unsigned idx; set_idx = setvec[found - 1]->set + 1; for (idx = 0; idx < found; idx++) @@ -1990,6 +1990,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *next_i; struct nat_entry *natvec[NATVEC_SIZE]; + struct nat_entry_set *setvec[SETVEC_SIZE]; nid_t nid = 0; unsigned int found; @@ -2014,11 +2015,27 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; + nid = nat_get_nid(natvec[found - 1]) + 1; for (idx = 0; idx < found; idx++) __del_from_nat_cache(nm_i, natvec[idx]); } f2fs_bug_on(sbi, nm_i->nat_cnt); + + /* destroy nat set cache */ + nid = 0; + while ((found = __gang_lookup_nat_set(nm_i, + nid, SETVEC_SIZE, setvec))) { + unsigned idx; + + nid = setvec[found - 1]->set + 1; + for (idx = 0; idx < found; idx++) { + /* entry_cnt is not zero, when cp_error was occurred */ + f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list)); + radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set); + kmem_cache_free(nat_entry_set_slab, setvec[idx]); + } + } up_write(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index cac8a3d9..f405bbf2 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -25,6 +25,7 @@ /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 +#define SETVEC_SIZE 32 /* return value for read_node_page */ #define LOCKED_PAGE 1 From fba40baf1b687c4a875ac602e8fe05b32d86ff45 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Jan 2015 11:09:37 -0800 Subject: [PATCH 114/321] f2fs: add nat/sit entries into status This patch adds NAT/SIT entry informations. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 8 +++++--- fs/f2fs/f2fs.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index dd7835b2..1f0fb580 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -57,7 +57,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->node_pages = NODE_MAPPING(sbi)->nrpages; si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; - si->sits = SIT_I(sbi)->dirty_sentries; + si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; + si->sits = MAIN_SEGS(sbi); + si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->fnids = NM_I(sbi)->fcnt; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -260,8 +262,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_dent, si->ndirty_dirs); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); - seq_printf(s, " - NATs: %9d\n - SITs: %9d\n", - si->nats, si->sits); + seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", + si->dirty_nats, si->nats, si->dirty_sits, si->sits); seq_printf(s, " - free_nids: %9d\n", si->fnids); seq_puts(s, "\nDistribution of User Blocks:"); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 10332069..2a89c83d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1511,7 +1511,7 @@ struct f2fs_stat_info { int main_area_segs, main_area_sections, main_area_zones; int hit_ext, total_ext; int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; - int nats, sits, fnids; + int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; int bg_gc, inline_inode, inline_dir, inmem_pages; unsigned int valid_count, valid_node_count, valid_inode_count; From b181c553caa550806621b28d0fcac3041f2ccccb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 9 Jan 2015 16:27:17 -0800 Subject: [PATCH 115/321] f2fs: avoid infinite loop on cp_error If cp_error is set, we should avoid all the infinite loop. In f2fs_sync_file, there is a hole, and this patch fixes that. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 90fe3b52..87e76864 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -240,6 +240,10 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) sync_nodes: sync_node_pages(sbi, ino, &wbc); + /* if cp_error was enabled, we should avoid infinite loop */ + if (unlikely(f2fs_cp_error(sbi))) + goto out; + if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); From abc6af81569a084add0857c4011267bb29984440 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 10 Jan 2015 20:09:52 +0800 Subject: [PATCH 116/321] f2fs: fix wrong memory footprint statistics in debugfs Our value of memory footprint statistics showed in debugfs is not calculated correctly. Fix it in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 1f0fb580..b45daab9 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -163,13 +163,20 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); +get_cache: + si->cache_mem = 0; + /* build gc */ - si->base_mem += sizeof(struct f2fs_gc_kthread); + if (sbi->gc_thread) + si->cache_mem += sizeof(struct f2fs_gc_kthread); + + /* build merge flush thread */ + if (SM_I(sbi)->cmd_control_info) + si->cache_mem += sizeof(struct flush_cmd_control); -get_cache: /* free nids */ - si->cache_mem = NM_I(sbi)->fcnt; - si->cache_mem += NM_I(sbi)->nat_cnt; + si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid); + si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); npages = NODE_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; From ed0f0a6e9d96485c1dbe858b14a6088d11e2b12b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 10 Jan 2015 21:37:36 -0800 Subject: [PATCH 117/321] f2fs: update memory footprint information This patch adds missing memory usages, and splits them in detail. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 24 +++++++++++++++++------- fs/f2fs/f2fs.h | 2 +- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index b45daab9..0f721f6a 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -177,13 +177,18 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* free nids */ si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid); si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); - npages = NODE_MAPPING(sbi)->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; - npages = META_MAPPING(sbi)->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; + si->cache_mem += NM_I(sbi)->dirty_nat_cnt * + sizeof(struct nat_entry_set); + si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); + + si->page_mem = 0; + npages = NODE_MAPPING(sbi)->nrpages; + si->page_mem += npages << PAGE_CACHE_SHIFT; + npages = META_MAPPING(sbi)->nrpages; + si->page_mem += npages << PAGE_CACHE_SHIFT; } static int stat_show(struct seq_file *s, void *v) @@ -301,9 +306,14 @@ static int stat_show(struct seq_file *s, void *v) /* memory footprint */ update_mem_info(si->sbi); - seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", - (si->base_mem + si->cache_mem) >> 10, - si->base_mem >> 10, si->cache_mem >> 10); + seq_printf(s, "\nMemory: %u KB\n", + (si->base_mem + si->cache_mem + si->page_mem) >> 10); + seq_printf(s, " - static: %u KB\n", + si->base_mem >> 10); + seq_printf(s, " - cached: %u KB\n", + si->cache_mem >> 10); + seq_printf(s, " - paged : %u KB\n", + si->page_mem >> 10); } mutex_unlock(&f2fs_stat_mutex); return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2a89c83d..31fc6703 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1529,7 +1529,7 @@ struct f2fs_stat_info { unsigned int segment_count[2]; unsigned int block_count[2]; unsigned int inplace_count; - unsigned base_mem, cache_mem; + unsigned base_mem, cache_mem, page_mem; }; static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) From 8c6bf905d34b8b27bd9a2a45f802f530c5bd788b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Jan 2015 16:34:24 -0800 Subject: [PATCH 118/321] f2fs: trigger correct checkpoint during umount This patch fixes to trigger checkpoint with umount flag when kill_sb was called. In kill_sb, f2fs_sync_fs was finally called, but at this time, f2fs can't do checkpoint with CP_UMOUNT. After then, f2fs_put_super is not doing checkpoint, since it is not dirty. So, this patch adds a flag to indicate f2fs_sync_fs is called during umount. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/super.c Change-Id: Ib3cd5e80c0b713743725810c54195c1aee60348a --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 31fc6703..8525cd88 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -524,6 +524,7 @@ struct f2fs_sb_info { struct f2fs_super_block *raw_super; /* raw super block pointer */ int s_dirty; /* dirty flag for checkpoint */ bool need_fsck; /* need fsck.f2fs to fix */ + bool s_closing; /* specify unmounting */ /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6a8568f..2c9de6c2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -479,9 +479,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); if (sync) { - struct cp_control cpc = { - .reason = CP_SYNC, - }; + struct cp_control cpc; + + cpc.reason = (sbi->s_closing) ? CP_UMOUNT : CP_SYNC; mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); @@ -1181,11 +1181,18 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); } +static void kill_f2fs_super(struct super_block *sb) +{ + if (sb->s_root) + F2FS_SB(sb)->s_closing = true; + kill_block_super(sb); +} + static struct file_system_type f2fs_fs_type = { .owner = THIS_MODULE, .name = "f2fs", .mount = f2fs_mount, - .kill_sb = kill_block_super, + .kill_sb = kill_f2fs_super, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("f2fs"); From 983a4535ce1a1169b95dff6303defbb81c6dff70 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Jan 2015 17:41:41 -0800 Subject: [PATCH 119/321] f2fs: do checkpoint when umount flag is not set If the previous checkpoint was done without CP_UMOUNT flag, it needs to do checkpoint with CP_UMOUNT for the next fast boot. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 ++- fs/f2fs/super.c | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e7e1f7ac..9c9a758e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1041,7 +1041,8 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); - if (!sbi->s_dirty && cpc->reason != CP_DISCARD) + if (!sbi->s_dirty && + cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2c9de6c2..c185a08f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -441,8 +441,13 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_stats(sbi); stop_gc_thread(sbi); - /* We don't need to do checkpoint when it's clean */ - if (sbi->s_dirty) { + /* + * We don't need to do checkpoint when superblock is clean. + * But, the previous checkpoint was not done by umount, it needs to do + * clean checkpoint again. + */ + if (sbi->s_dirty || + !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) { struct cp_control cpc = { .reason = CP_UMOUNT, }; From 5a31b10af3a8dc19900f5a581200525905697c3c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 19 Jan 2015 20:24:37 +0800 Subject: [PATCH 120/321] f2fs: fix to release count of meta page in ->invalidatepage We will encounter deadloop in below scenario: 1. increase page count for F2FS_DIRTY_META type in following path: ->recover_fsync_data ->recover_data ->do_recover_data ->recover_data_page ->change_curseg ->write_sum_page ->set_page_dirty 2. fail in recover_data() 3. invalidate meta pages in truncate_inode_pages_final without decreasing page count. 4. deadloop when sync_meta_pages as page count will always be non-zero. message: NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [] pagevec_lookup_tag+0x27/0x30 [] sync_meta_pages+0x87/0x160 [f2fs] [] recover_fsync_data+0xeb9/0xf10 [f2fs] [] f2fs_fill_super+0x888/0x980 [f2fs] [] mount_bdev+0x16a/0x1a0 [] f2fs_mount+0x1f/0x30 [f2fs] [] mount_fs+0x36/0x170 [] vfs_kern_mount+0x55/0xe0 [] do_mount+0x1df/0x9f0 [] SyS_mount+0x70/0xb0 [] sysenter_do_call+0x12/0x12 To avoid page count leak, let's add ->invalidatepage and ->releasepage in f2fs_meta_aops as f2fs_node_aops to release meta page count correctly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/checkpoint.c Change-Id: I683626bb4d19f63d22d59907ae31df2a96b0d996 --- fs/f2fs/checkpoint.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9c9a758e..860f83ab 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -302,15 +302,33 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); + SetPagePrivate(page); return 1; } return 0; } +static void f2fs_invalidate_meta_page(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + + if (PageDirty(page)) + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_META); + ClearPagePrivate(page); +} + +static int f2fs_release_meta_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 1; +} + const struct address_space_operations f2fs_meta_aops = { .writepage = f2fs_write_meta_page, .writepages = f2fs_write_meta_pages, .set_page_dirty = f2fs_set_meta_page_dirty, + .invalidatepage = f2fs_invalidate_meta_page, + .releasepage = f2fs_release_meta_page, }; static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) From 042d91a2970616742a0d37042c3bda4751f96e2a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 22 Jan 2015 14:48:28 -0800 Subject: [PATCH 121/321] f2fs: leave comment for code readability During the recovery, any xattr blocks should not be found, since they are written into cold log, not the warm node chain. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2b48b6c1..bee5c7ee 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -346,6 +346,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (IS_INODE(page)) { recover_inline_xattr(inode, page); } else if (f2fs_has_xattr_block(ofs_of_node(page))) { + /* + * Deprecated; xattr blocks should be found from cold log. + * But, we should remain this for backward compatibility. + */ recover_xattr_data(inode, page, blkaddr); goto out; } From bdb4f2213c5b28b175518de1f7340b827a2baf9e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 23 Jan 2015 20:37:53 +0800 Subject: [PATCH 122/321] f2fs: use f2fs_radix_tree_insert to clean codes No modification in functionality, just clean codes with f2fs_radix_tree_insert. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 429ff218..71e3d2e7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -356,11 +356,8 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) } new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); new_ie->inode = inode; -retry: - if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { - cond_resched(); - goto retry; - } + + f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); list_add_tail(&new_ie->list, &gc_list->ilist); } From dd01e60f8a5e02abf7cca93015135c0451f0874a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Jan 2015 20:22:55 +0800 Subject: [PATCH 123/321] f2fs: make truncate_inline_date static 1. make truncate_inline_date static; 2. remove parameter @from of truncate_inline_date as callers only pass zero. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - fs/f2fs/inline.c | 25 +++++++++---------------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8525cd88..a63275ef 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1647,7 +1647,6 @@ int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); int f2fs_convert_inline_inode(struct inode *); int f2fs_write_inline_data(struct inode *, struct page *); -void truncate_inline_data(struct page *, u64); bool recover_inline_data(struct inode *, struct page *); struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, struct page **, unsigned int); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 020c7169..61214c5a 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -50,6 +50,12 @@ void read_inline_data(struct page *page, struct page *ipage) SetPageUptodate(page); } +static void truncate_inline_data(struct page *ipage) +{ + f2fs_wait_on_page_writeback(ipage, NODE); + memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA); +} + int f2fs_read_inline_data(struct inode *inode, struct page *page) { struct page *ipage; @@ -125,7 +131,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - truncate_inline_data(dn->inode_page, 0); + truncate_inline_data(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); f2fs_clear_inline_inode(dn->inode); @@ -198,19 +204,6 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) return 0; } -void truncate_inline_data(struct page *ipage, u64 from) -{ - void *addr; - - if (from >= MAX_INLINE_DATA) - return; - - f2fs_wait_on_page_writeback(ipage, NODE); - - addr = inline_data_addr(ipage); - memset(addr + from, 0, MAX_INLINE_DATA - from); -} - bool recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -252,7 +245,7 @@ bool recover_inline_data(struct inode *inode, struct page *npage) if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - truncate_inline_data(ipage, 0); + truncate_inline_data(ipage); f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); @@ -371,7 +364,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - truncate_inline_data(ipage, 0); + truncate_inline_data(ipage); stat_dec_inline_dir(dir); clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); From 17fff6a25ed4fb03d93a28c38c0b48758867d9ab Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Jan 2015 20:24:21 +0800 Subject: [PATCH 124/321] f2fs: clean up {in,de}create_sleep_time Use pointer parameter @wait to pass result in {in,de}create_sleep_time for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/gc.c Change-Id: I652c0480aa4a5fecb0614a08f652f682525efa7c Conflicts: fs/f2fs/gc.c --- fs/f2fs/gc.c | 8 ++++---- fs/f2fs/gc.h | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 71e3d2e7..d2fd71fa 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -44,7 +44,7 @@ static int gc_thread_func(void *data) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { - wait_ms = increase_sleep_time(gc_th, wait_ms); + increase_sleep_time(gc_th, &wait_ms); continue; } @@ -65,15 +65,15 @@ static int gc_thread_func(void *data) continue; if (!is_idle(sbi)) { - wait_ms = increase_sleep_time(gc_th, wait_ms); + increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); continue; } if (has_enough_invalid_blocks(sbi)) - wait_ms = decrease_sleep_time(gc_th, wait_ms); + decrease_sleep_time(gc_th, &wait_ms); else - wait_ms = increase_sleep_time(gc_th, wait_ms); + increase_sleep_time(gc_th, &wait_ms); stat_inc_bggc_count(sbi); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 524543a6..d5ff97c5 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -66,26 +66,26 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; } -static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th, + long *wait) { - if (wait == gc_th->no_gc_sleep_time) - return wait; + if (*wait == gc_th->no_gc_sleep_time) + return; - wait += gc_th->min_sleep_time; - if (wait > gc_th->max_sleep_time) - wait = gc_th->max_sleep_time; - return wait; + *wait += gc_th->min_sleep_time; + if (*wait > gc_th->max_sleep_time) + *wait = gc_th->max_sleep_time; } -static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, + long *wait) { - if (wait == gc_th->no_gc_sleep_time) - wait = gc_th->max_sleep_time; + if (*wait == gc_th->no_gc_sleep_time) + *wait = gc_th->max_sleep_time; - wait -= gc_th->min_sleep_time; - if (wait <= gc_th->min_sleep_time) - wait = gc_th->min_sleep_time; - return wait; + *wait -= gc_th->min_sleep_time; + if (*wait <= gc_th->min_sleep_time) + *wait = gc_th->min_sleep_time; } static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) From 8c129f1e1f66f3b11fa3e8f6c1645abdf9895699 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 Jan 2015 17:48:42 +0800 Subject: [PATCH 125/321] f2fs: merge flags in struct f2fs_sb_info Currently, there are several variables with Boolean type as below: struct f2fs_sb_info { ... int s_dirty; bool need_fsck; bool s_closing; ... bool por_doing; ... } For this there are some issues: 1. there are some space of f2fs_sb_info is wasted due to aligning after Boolean type variables by compiler. 2. if we continuously add new flag into f2fs_sb_info, structure will be messed up. So in this patch, we try to: 1. switch s_dirty to Boolean type variable since it has two status 0/1. 2. merge s_dirty/need_fsck/s_closing/por_doing variables into s_flag. 3. introduce an enum type which can indicate different states of sbi. 4. use new introduced universal interfaces is_sbi_flag_set/{set,clear}_sbi_flag to operate flags for sbi. After that, above issues will be fixed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/super.c Change-Id: I91ece0f1143999a112919786a2f117b4ec1dbdf1 --- fs/f2fs/checkpoint.c | 12 ++++++------ fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 30 ++++++++++++++++++++---------- fs/f2fs/node.c | 4 ++-- fs/f2fs/recovery.c | 4 ++-- fs/f2fs/segment.h | 10 +++++----- fs/f2fs/super.c | 13 +++++++------ include/trace/events/f2fs.h | 4 ++-- 8 files changed, 45 insertions(+), 34 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 860f83ab..2ec406f8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -190,7 +190,7 @@ static int f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; @@ -483,7 +483,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) return; - sbi->por_doing = true; + set_sbi_flag(sbi, SBI_POR_DOING); start_blk = __start_cp_addr(sbi) + 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); @@ -504,7 +504,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) } /* clear Orphan Flag */ clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); return; } @@ -970,7 +970,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (sbi->need_fsck) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) set_ckpt_flags(ckpt, CP_FSCK_FLAG); /* update SIT/NAT bitmap */ @@ -1044,7 +1044,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return; clear_prefree_segments(sbi); - F2FS_RESET_SB_DIRT(sbi); + clear_sbi_flag(sbi, SBI_IS_DIRTY); } /* @@ -1059,7 +1059,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); - if (!sbi->s_dirty && + if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) goto out; if (unlikely(f2fs_cp_error(sbi))) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ef510e14..9181b2cf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -815,7 +815,7 @@ static int f2fs_write_data_page(struct page *page, zero_user_segment(page, offset, PAGE_CACHE_SIZE); write: - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (f2fs_is_drop_cache(inode)) goto out; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a63275ef..f9616ca6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -28,7 +28,7 @@ do { \ if (unlikely(condition)) { \ WARN_ON(1); \ - sbi->need_fsck = true; \ + set_sbi_flag(sbi, SBI_NEED_FSCK); \ } \ } while (0) #define f2fs_down_write(x, y) down_write(x) @@ -517,14 +517,20 @@ struct inode_management { unsigned long ino_num; /* number of entries */ }; +/* For s_flag in struct f2fs_sb_info */ +enum { + SBI_IS_DIRTY, /* dirty flag for checkpoint */ + SBI_IS_CLOSE, /* specify unmounting */ + SBI_NEED_FSCK, /* need fsck.f2fs to fix */ + SBI_POR_DOING, /* recovery is doing or not */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ - int s_dirty; /* dirty flag for checkpoint */ - bool need_fsck; /* need fsck.f2fs to fix */ - bool s_closing; /* specify unmounting */ + int s_flag; /* flags for sbi */ /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ @@ -544,7 +550,6 @@ struct f2fs_sb_info { struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ - bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ @@ -697,14 +702,19 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) return sbi->node_inode->i_mapping; } -static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) +static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type) +{ + return sbi->s_flag & (0x01 << type); +} + +static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) { - sbi->s_dirty = 1; + sbi->s_flag |= (0x01 << type); } -static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) +static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) { - sbi->s_dirty = 0; + sbi->s_flag &= ~(0x01 << type); } static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) @@ -816,7 +826,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { atomic_inc(&sbi->nr_pages[count_type]); - F2FS_SET_SB_DIRT(sbi); + set_sbi_flag(sbi, SBI_IS_DIRTY); } static inline void inode_inc_dirty_pages(struct inode *inode) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1b3f9e9f..7bb62c71 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -587,7 +587,7 @@ static void truncate_node(struct dnode_of_data *dn) } invalidate: clear_node_page_dirty(dn->node_page); - F2FS_SET_SB_DIRT(sbi); + set_sbi_flag(sbi, SBI_IS_DIRTY); f2fs_put_page(dn->node_page, 1); @@ -1285,7 +1285,7 @@ static int f2fs_write_node_page(struct page *page, trace_f2fs_writepage(page, NODE); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index bee5c7ee..7c312dd7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -508,7 +508,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ - sbi->por_doing = true; + set_sbi_flag(sbi, SBI_POR_DOING); /* prevent checkpoint */ mutex_lock(&sbi->cp_mutex); @@ -541,7 +541,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) truncate_inode_pages(META_MAPPING(sbi), 0); } - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); if (err) { discard_next_dnode(sbi, blkaddr); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7f327c0b..421d5794 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -460,7 +460,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + @@ -599,13 +599,13 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) { if (segno > TOTAL_SEGS(sbi) - 1) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); } static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) { if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); } /* @@ -616,11 +616,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, { /* check segment usage */ if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); /* check boundary of a given segment number */ if (segno > TOTAL_SEGS(sbi) - 1) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); } #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c185a08f..7181dcc3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -446,7 +446,7 @@ static void f2fs_put_super(struct super_block *sb) * But, the previous checkpoint was not done by umount, it needs to do * clean checkpoint again. */ - if (sbi->s_dirty || + if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) { struct cp_control cpc = { .reason = CP_UMOUNT, @@ -486,7 +486,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (sync) { struct cp_control cpc; - cpc.reason = (sbi->s_closing) ? CP_UMOUNT : CP_SYNC; + cpc.reason = (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) ? + CP_UMOUNT : CP_SYNC; mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); @@ -885,7 +886,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); sbi->dir_level = DEF_DIR_LEVEL; - sbi->need_fsck = false; + clear_sbi_flag(sbi, SBI_NEED_FSCK); } /* @@ -996,7 +997,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); spin_lock_init(&sbi->stat_lock); init_rwsem(&sbi->read_io.io_rwsem); @@ -1120,7 +1121,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_proc; if (!retry) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -1189,7 +1190,7 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, static void kill_f2fs_super(struct super_block *sb) { if (sb->s_root) - F2FS_SB(sb)->s_closing = true; + set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); kill_block_super(sb); } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index a3e6dd4b..46f70fc5 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -184,13 +184,13 @@ TRACE_EVENT(f2fs_sync_fs, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, dirty) + __field(bool, dirty) __field(int, wait) ), TP_fast_assign( __entry->dev = sb->s_dev; - __entry->dirty = F2FS_SB(sb)->s_dirty; + __entry->dirty = is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY); __entry->wait = wait; ), From e04ad921c4c02f7dbbf31a3153fd772089a0c6b4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Jan 2015 17:41:39 -0800 Subject: [PATCH 126/321] f2fs: fix not to drop mount options when retrying fill_super If wrong mount option was requested, f2fs tries to fill_super again. But, during the next trial, f2fs has no valid mount options, since parse_options deleted all the separators in the original string. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7181dcc3..892af642 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -941,6 +941,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct inode *root; long err = -EINVAL; bool retry = true; + char *options = NULL; int i; try_onemore: @@ -972,9 +973,15 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi, POSIX_ACL); #endif /* parse mount options */ - err = parse_options(sb, (char *)data); - if (err) + options = kstrdup((const char *)data, GFP_KERNEL); + if (data && !options) { + err = -ENOMEM; goto free_sb_buf; + } + + err = parse_options(sb, options); + if (err) + goto free_options; sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); sb->s_max_links = F2FS_LINK_MAX; @@ -1018,7 +1025,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (IS_ERR(sbi->meta_inode)) { f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); - goto free_sb_buf; + goto free_options; } err = get_valid_checkpoint(sbi); @@ -1143,6 +1150,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_kobj; } + kfree(options); return 0; free_kobj: @@ -1167,6 +1175,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); +free_options: + kfree(options); free_sb_buf: brelse(raw_super_buf); free_sbi: From e529d8e939d9450a2e21bb03d575d81c4808c8f0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Jan 2015 18:43:45 -0800 Subject: [PATCH 127/321] f2fs: avoid write_checkpoint if f2fs is mounted readonly Do not change any partition when f2fs is changed to readonly mode. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2ec406f8..9d216974 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1064,6 +1064,8 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; + if (f2fs_readonly(sbi->sb)) + goto out; if (block_operations(sbi)) goto out; From 9d33d89c2d708b8e145973b0dcecf543147080b5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 29 Jan 2015 11:45:33 -0800 Subject: [PATCH 128/321] f2fs: split UMOUNT and FASTBOOT flags This patch adds FASTBOOT flag into checkpoint as follows. - CP_UMOUNT_FLAG is set when system is umounted. - CP_FASTBOOT_FLAG is set when intermediate checkpoint having node summaries was done. So, if you get CP_UMOUNT_FLAG from checkpoint, the system was umounted cleanly. Instead, if there was sudden-power-off, you can get CP_FASTBOOT_FLAG or nothing. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/gc.c fs/f2fs/super.c Change-Id: Iad844583f013f63a94f8c78dfa144611fe9daa12 --- fs/f2fs/checkpoint.c | 14 ++++++++------ fs/f2fs/f2fs.h | 19 +++++++++++++++++++ fs/f2fs/gc.c | 1 + fs/f2fs/segment.c | 11 +++++------ fs/f2fs/super.c | 3 +-- 5 files changed, 34 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9d216974..dbc9e24a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -953,17 +953,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); - if (cpc->reason == CP_UMOUNT) { - set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + if (__remain_node_summaries(cpc->reason)) ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); - } else { - clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + else ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks); - } + + if (cpc->reason == CP_UMOUNT) + set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + else + clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); @@ -1007,7 +1009,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) write_data_summaries(sbi, start_blk); start_blk += data_sum_blocks; - if (cpc->reason == CP_UMOUNT) { + if (__remain_node_summaries(cpc->reason)) { write_node_summaries(sbi, start_blk); start_blk += NR_CURSEG_NODE_TYPE; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f9616ca6..603860f1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -762,6 +762,25 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) up_write(&sbi->cp_rwsem); } +static inline int __get_cp_reason(struct f2fs_sb_info *sbi) +{ + int reason = CP_SYNC; + + if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) + reason = CP_UMOUNT; + return reason; +} + +static inline bool __remain_node_summaries(int reason) +{ + return (reason == CP_UMOUNT); +} + +static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) +{ + return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)); +} + /* * Check whether the given nid is within node id range. */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d2fd71fa..788619f2 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -700,6 +700,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) .iroot = RADIX_TREE_INIT(GFP_NOFS), }; + cpc.reason = __get_cp_reason(sbi); gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9b0cd93b..0204aedf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1458,7 +1458,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) segno = le32_to_cpu(ckpt->cur_data_segno[type]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - CURSEG_HOT_DATA]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); else blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); @@ -1467,7 +1467,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) CURSEG_HOT_NODE]); blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - CURSEG_HOT_NODE]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, type - CURSEG_HOT_NODE); else @@ -1478,7 +1478,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) sum = (struct f2fs_summary_block *)page_address(new); if (IS_NODESEG(type)) { - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { + if (__exist_node_summaries(sbi)) { struct f2fs_summary *ns = &sum->entries[0]; int i; for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { @@ -1527,7 +1527,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) type = CURSEG_HOT_NODE; } - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), NR_CURSEG_TYPE - type, META_CP); @@ -1624,8 +1624,7 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) - write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); + write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); } int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 892af642..28cbde15 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -486,8 +486,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (sync) { struct cp_control cpc; - cpc.reason = (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) ? - CP_UMOUNT : CP_SYNC; + cpc.reason = __get_cp_reason(sbi); mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); From 6a38c6f71d00373550dbbbac4597e374d5adf738 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Jan 2015 19:16:59 -0800 Subject: [PATCH 129/321] f2fs: should fail mount when trying to recover data on read-only dev If device is read-only, we should not proceed data recovery. But, if the previous checkpoint was done by normal clean shutdown, it's safe to proceed the recovery, since there will be no data to be recovered. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 28cbde15..f3efdd64 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1131,6 +1131,15 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + /* + * mount should be failed, when device has readonly mode, and + * previous checkpoint was not done by clean system shutdown. + */ + if (bdev_read_only(sb->s_bdev) && + !is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) { + err = -EROFS; + goto free_kobj; + } err = recover_fsync_data(sbi); if (err) { f2fs_msg(sb, KERN_ERR, From b1a738af71b8e856201b99bc7197c0362ea2eff0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 30 Jan 2015 11:39:08 -0800 Subject: [PATCH 130/321] f2fs: keep PagePrivate during releasepage If PagePrivate is removed by releasepage, f2fs loses counting dirty pages. e.g., try_to_release_page will not release page when the page is dirty, but our releasepage removes PagePrivate. [] try_to_release_page+0x35/0x50 [] invalidate_inode_pages2_range+0x2f9/0x3b0 [] ? truncate_blocks+0x384/0x4d0 [f2fs] [] ? f2fs_direct_IO+0x283/0x290 [f2fs] [] ? get_data_block_fiemap+0x20/0x20 [f2fs] [] generic_file_direct_write+0x163/0x170 [] __generic_file_write_iter+0x2a6/0x350 [] generic_file_write_iter+0x3f/0xb0 [] new_sync_write+0x81/0xb0 [] vfs_write+0xb7/0x1f0 [] SyS_write+0x49/0xb0 [] system_call_fastpath+0x16/0x1b Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++++ fs/f2fs/data.c | 4 ++++ fs/f2fs/node.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dbc9e24a..7ac65066 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -319,6 +319,10 @@ static void f2fs_invalidate_meta_page(struct page *page, unsigned long offset) static int f2fs_release_meta_page(struct page *page, gfp_t wait) { + /* If this is dirty page, keep PagePrivate */ + if (PageDirty(page)) + return 0; + ClearPagePrivate(page); return 1; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9181b2cf..8882dd43 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1129,6 +1129,10 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) static int f2fs_release_data_page(struct page *page, gfp_t wait) { + /* If this is dirty page, keep PagePrivate */ + if (PageDirty(page)) + return 0; + ClearPagePrivate(page); return 1; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7bb62c71..56a76d1d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1380,6 +1380,10 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) static int f2fs_release_node_page(struct page *page, gfp_t wait) { + /* If this is dirty page, keep PagePrivate */ + if (PageDirty(page)) + return 0; + ClearPagePrivate(page); return 1; } From 702d49559a6aeccd0a72bbe6eb5031f9996d4a62 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 30 Jan 2015 16:43:11 -0800 Subject: [PATCH 131/321] f2fs: show the number of writeback pages in stat This patch adds the # of writeback pages in stat info. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 5 +++-- fs/f2fs/f2fs.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0f721f6a..ac2bd8e7 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -40,6 +40,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); + si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); @@ -266,8 +267,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - inmem: %4d\n", - si->inmem_pages); + seq_printf(s, " - inmem: %4d, wb: %4d\n", + si->inmem_pages, si->wb_pages); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 603860f1..a2dc0d12 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1543,7 +1543,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; - int bg_gc, inline_inode, inline_dir, inmem_pages; + int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; From 0cb28be6fe4796ac11859f41892aa306e1bc7586 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:44:29 +0800 Subject: [PATCH 132/321] f2fs: merge {invalidate,release}page for meta/node/data pages This patch merges ->{invalidate,release}page function for meta/node/data pages. After this, duplication of codes could be removed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/data.c fs/f2fs/node.c Change-Id: I87445ec7c19cc709702f770883a62da8e7674e6c --- fs/f2fs/checkpoint.c | 23 ++--------------------- fs/f2fs/data.c | 21 ++++++++++++++------- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/node.c | 22 ++-------------------- 4 files changed, 20 insertions(+), 48 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7ac65066..8094d2f7 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -308,31 +308,12 @@ static int f2fs_set_meta_page_dirty(struct page *page) return 0; } -static void f2fs_invalidate_meta_page(struct page *page, unsigned long offset) -{ - struct inode *inode = page->mapping->host; - - if (PageDirty(page)) - dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_META); - ClearPagePrivate(page); -} - -static int f2fs_release_meta_page(struct page *page, gfp_t wait) -{ - /* If this is dirty page, keep PagePrivate */ - if (PageDirty(page)) - return 0; - - ClearPagePrivate(page); - return 1; -} - const struct address_space_operations f2fs_meta_aops = { .writepage = f2fs_write_meta_page, .writepages = f2fs_write_meta_pages, .set_page_dirty = f2fs_set_meta_page_dirty, - .invalidatepage = f2fs_invalidate_meta_page, - .releasepage = f2fs_release_meta_page, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, }; static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8882dd43..375dbff3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1115,19 +1115,26 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, return err; } -static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) +void f2fs_invalidate_page(struct page *page, unsigned long offset) { struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (offset % PAGE_CACHE_SIZE) + if (inode->i_ino >= F2FS_ROOT_INO(sbi) && (offset % PAGE_CACHE_SIZE)) return; - if (PageDirty(page)) - inode_dec_dirty_pages(inode); + if (PageDirty(page)) { + if (inode->i_ino == F2FS_META_INO(sbi)) + dec_page_count(sbi, F2FS_DIRTY_META); + else if (inode->i_ino == F2FS_NODE_INO(sbi)) + dec_page_count(sbi, F2FS_DIRTY_NODES); + else + inode_dec_dirty_pages(inode); + } ClearPagePrivate(page); } -static int f2fs_release_data_page(struct page *page, gfp_t wait) +int f2fs_release_page(struct page *page, gfp_t wait) { /* If this is dirty page, keep PagePrivate */ if (PageDirty(page)) @@ -1182,8 +1189,8 @@ const struct address_space_operations f2fs_dblock_aops = { .write_begin = f2fs_write_begin, .write_end = f2fs_write_end, .set_page_dirty = f2fs_set_data_page_dirty, - .invalidatepage = f2fs_invalidate_data_page, - .releasepage = f2fs_release_data_page, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, .direct_IO = f2fs_direct_IO, .bmap = f2fs_bmap, }; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a2dc0d12..f4e7d427 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1514,6 +1514,8 @@ struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct page *, struct f2fs_io_info *); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); +void f2fs_invalidate_page(struct page *, unsigned long); +int f2fs_release_page(struct page *, gfp_t); /* * gc.c diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 56a76d1d..7c59c302 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1370,24 +1370,6 @@ static int f2fs_set_node_page_dirty(struct page *page) return 0; } -static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) -{ - struct inode *inode = page->mapping->host; - if (PageDirty(page)) - dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES); - ClearPagePrivate(page); -} - -static int f2fs_release_node_page(struct page *page, gfp_t wait) -{ - /* If this is dirty page, keep PagePrivate */ - if (PageDirty(page)) - return 0; - - ClearPagePrivate(page); - return 1; -} - /* * Structure of the f2fs node operations */ @@ -1395,8 +1377,8 @@ const struct address_space_operations f2fs_node_aops = { .writepage = f2fs_write_node_page, .writepages = f2fs_write_node_pages, .set_page_dirty = f2fs_set_node_page_dirty, - .invalidatepage = f2fs_invalidate_node_page, - .releasepage = f2fs_release_node_page, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, }; static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, From dca858987f83276f9d512f078cb4764aae0d19e2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 26 Jan 2015 17:41:23 -0800 Subject: [PATCH 133/321] f2fs: introduce a batched trim This patch introduces a batched trimming feature, which submits split discard commands. This is to avoid long latency due to huge trim commands. If fstrim was triggered ranging from 0 to the end of device, we should lock all the checkpoint-related mutexes, resulting in very long latency. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: Documentation/ABI/testing/sysfs-fs-f2fs Change-Id: I3648efaf8a833a88459fb7d8813b2a18d61bd949 --- Documentation/ABI/testing/sysfs-fs-f2fs | 82 +++++++++++++++++++++++++ Documentation/filesystems/f2fs.txt | 4 ++ fs/f2fs/f2fs.h | 7 +++ fs/f2fs/segment.c | 17 +++-- fs/f2fs/super.c | 2 + 5 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-fs-f2fs diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs new file mode 100644 index 00000000..2c4cc420 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -0,0 +1,82 @@ +What: /sys/fs/f2fs//gc_max_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the maximun sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_min_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the minimum sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_no_gc_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the default sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_idle +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the victim selection policy for garbage collection. + +What: /sys/fs/f2fs//reclaim_segments +Date: October 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the issue rate of segment discard commands. + +What: /sys/fs/f2fs//ipu_policy +Date: November 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the in-place-update policy. + +What: /sys/fs/f2fs//min_ipu_util +Date: November 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the FS utilization condition for the in-place-update + policies. + +What: /sys/fs/f2fs//min_fsync_blocks +Date: September 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the dirty page count condition for the in-place-update + policies. + +What: /sys/fs/f2fs//max_small_discards +Date: November 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the issue rate of small discard commands. + +What: /sys/fs/f2fs//max_victim_search +Date: January 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the number of trials to find a victim segment. + +What: /sys/fs/f2fs//dir_level +Date: March 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the directory level for large directory. + +What: /sys/fs/f2fs//ram_thresh +Date: March 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the memory footprint used by f2fs. + +What: /sys/fs/f2fs//trim_sections +Date: February 2015 +Contact: "Jaegeuk Kim" +Description: + Controls the trimming rate in batch mode. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 7067a539..199649a2 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -194,6 +194,10 @@ Files in /sys/fs/f2fs/ checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. + trim_sections This parameter controls the number of sections + to be trimmed out in batch mode when FITRIM + conducts. 32 sections is set by default. + ipu_policy This parameter controls the policy of in-place updates in f2fs. There are five policies: 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f4e7d427..d92d82df 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -103,6 +103,10 @@ enum { CP_DISCARD, }; +#define DEF_BATCHED_TRIM_SECTIONS 32 +#define BATCHED_TRIM_SEGMENTS(sbi) \ + (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) + struct cp_control { int reason; __u64 trim_start; @@ -445,6 +449,9 @@ struct f2fs_sm_info { int nr_discards; /* # of discards in the list */ int max_discards; /* max. discards to be issued */ + /* for batched trimming */ + unsigned int trim_sections; /* # of sections to trim */ + struct list_head sit_entry_set; /* sit entry set list */ unsigned int ipu_policy; /* in-place-update policy */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0204aedf..c4a81e4f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1123,14 +1123,19 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); cpc.reason = CP_DISCARD; - cpc.trim_start = start_segno; - cpc.trim_end = end_segno; cpc.trim_minlen = range->minlen >> sbi->log_blocksize; /* do checkpoint to issue discard commands safely */ - mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); + for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { + cpc.trim_start = start_segno; + cpc.trim_end = min_t(unsigned int, rounddown(start_segno + + BATCHED_TRIM_SEGMENTS(sbi), + sbi->segs_per_sec) - 1, end_segno); + + mutex_lock(&sbi->gc_mutex); + write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); + } out: range->len = cpc.trimmed << sbi->log_blocksize; return 0; @@ -2184,6 +2189,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->nr_discards = 0; sm_info->max_discards = 0; + sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; + INIT_LIST_HEAD(&sm_info->sit_entry_set); if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f3efdd64..96a7d0c0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -190,6 +190,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); @@ -205,6 +206,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle), ATTR_LIST(reclaim_segments), ATTR_LIST(max_small_discards), + ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), From adcabd7d3a2626550ebdd631842e8242207d89d2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 7 Feb 2015 17:36:15 +0800 Subject: [PATCH 134/321] f2fs: fix to use highmem for pages of newly created directory In commit a78186ebe516 ("f2fs: use highmem for directory pages"), we have set __GFP_HIGHMEM into dir mapping's gfp flag in f2fs_iget, so high address memory could be used for these existing dir's page. But we forgot to set flag for newly created dir, due to this reason, our newly created dir pages could not be allocated from high address memory. Fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 71d0e696..e6582a77 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -288,7 +288,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); set_inode_flag(F2FS_I(inode), FI_INC_LINK); f2fs_lock_op(sbi); From 114636ea688ce41f24182e2f3317a1018c7861ea Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Feb 2015 11:23:58 +0800 Subject: [PATCH 135/321] f2fs: avoid data offset overflow when lseeking huge file xfstest generic/285 complains our issue in lseeking huge file. Here is the detail output of generic/285: "./check -f2fs tests/generic/285 Ran: generic/285 Failures: generic/285 Failed 1 of 1 tests 10. Test a huge file for offset overflow 10.01 SEEK_HOLE expected 65536 or 8589934592, got 65536. succ 10.02 SEEK_HOLE expected 65536 or 8589934592, got 65536. succ 10.03 SEEK_DATA expected 0 or 0, got 0. succ 10.04 SEEK_DATA expected 1 or 1, got 1. succ 10.05 SEEK_HOLE expected 8589934592 or 8589934592, got 0. FAIL 10.06 SEEK_DATA expected 8589869056 or 8589869056, got 8589869056. succ 10.07 SEEK_DATA expected 8589869057 or 8589869057, got 8589869057. succ 10.08 SEEK_DATA expected 8589869056 or 8589869056, got 4294901760. FAIL" The reason of this issue is: We will calculate current offset through left shifting page-offset with PAGE_CACHE_SHIFT bits, but our page-offset is a type of unsigned long, its size is 4 bytes in 32-bits machine. So if our page-offset is bigger than (1 << 32 / pagesize - 1), result of left shifting will overflow. Let's fix this issue by casting type of page-offset to type of current offset: loff_t. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 87e76864..1d7d723a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -368,7 +368,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; dn.ofs_in_node++, pgofs++, - data_ofs = pgofs << PAGE_CACHE_SHIFT) { + data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) { block_t blkaddr; blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); From 51ad388c9b08b3e1b8460388f1f4f8a847f282d8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 Feb 2015 18:53:45 -0800 Subject: [PATCH 136/321] f2fs: check node page contents all the time In get_node_page, if the page is up-to-date, we assumed that the page was not reclaimed at all. But, sometimes it was reported that its contents was missing. So, just for sure, let's check its mapping and contents. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/node.c Change-Id: Ib4a720da145c0ba2ae4eedb61cf65f21ddce701d --- fs/f2fs/node.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7c59c302..58473c5d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1035,11 +1035,11 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) err = read_node_page(page, READ_SYNC); if (err < 0) return ERR_PTR(err); - else if (err == LOCKED_PAGE) - goto got_it; + else if (err != LOCKED_PAGE) + lock_page(page); - lock_page(page); if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { + ClearPageUptodate(page); f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -1047,7 +1047,6 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) f2fs_put_page(page, 1); goto repeat; } -got_it: mark_page_accessed(page); return page; } From 3732a4125e1788f5af4fab8f3dd2cec15f769551 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Feb 2015 10:34:38 -0800 Subject: [PATCH 137/321] f2fs: call set_buffer_new for get_block This patch fixes wrong handling of buffer_new flag in get_block. If f2fs allocates new blocks and mapped buffer_head, it needs to set buffer_new for the bh_result. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 375dbff3..9b02a1a7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -271,7 +271,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, unsigned int blkbits = inode->i_sb->s_blocksize_bits; size_t count; - clear_buffer_new(bh_result); + set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, start_blkaddr + pgofs - start_fofs); count = end_fofs - pgofs + 1; @@ -631,12 +631,14 @@ static int __get_data_block(struct inode *inode, sector_t iblock, goto put_out; if (dn.data_blkaddr != NULL_ADDR) { + set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, dn.data_blkaddr); } else if (create) { err = __allocate_data_block(&dn); if (err) goto put_out; allocated = true; + set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, dn.data_blkaddr); } else { goto put_out; From e7a99c6eea09763d3d37e623cb01505556896102 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Feb 2015 12:02:44 -0800 Subject: [PATCH 138/321] f2fs: introduce macros to convert bytes and blocks in f2fs This patch adds two macros for transition between byte and block offsets. Currently, f2fs only supports 4KB blocks, so use the default size for now. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++---- fs/f2fs/file.c | 3 +-- fs/f2fs/segment.c | 8 ++++---- include/linux/f2fs_fs.h | 4 ++++ 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8094d2f7..612c3413 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -974,15 +974,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* write out checkpoint buffer at block 0 */ cp_page = grab_meta_page(sbi, start_blk++); kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + memcpy(kaddr, ckpt, F2FS_BLKSIZE); set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); for (i = 1; i < 1 + cp_payload_blks; i++) { cp_page = grab_meta_page(sbi, start_blk++); kaddr = page_address(cp_page); - memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, - (1 << sbi->log_blocksize)); + memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE); set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); } @@ -1002,7 +1001,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* writeout checkpoint block */ cp_page = grab_meta_page(sbi, start_blk); kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + memcpy(kaddr, ckpt, F2FS_BLKSIZE); set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1d7d723a..1baefb4f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -502,8 +502,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); - free_from = (pgoff_t) - ((from + blocksize - 1) >> (sbi->log_blocksize)); + free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); if (lock) f2fs_lock_op(sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c4a81e4f..cae78a6f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1105,8 +1105,8 @@ static const struct segment_allocation default_salloc_ops = { int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { - __u64 start = range->start >> sbi->log_blocksize; - __u64 end = start + (range->len >> sbi->log_blocksize) - 1; + __u64 start = F2FS_BYTES_TO_BLK(range->start); + __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; unsigned int start_segno, end_segno; struct cp_control cpc; @@ -1123,7 +1123,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); cpc.reason = CP_DISCARD; - cpc.trim_minlen = range->minlen >> sbi->log_blocksize; + cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen); /* do checkpoint to issue discard commands safely */ for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { @@ -1137,7 +1137,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) mutex_unlock(&sbi->gc_mutex); } out: - range->len = cpc.trimmed << sbi->log_blocksize; + range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return 0; } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 0db7b366..0b9ef60b 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,12 +19,16 @@ #define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ #define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ +#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ +#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) +#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) + /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 From 43a8d98d3e7e10a36fc9a45d389bb691077a2da2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Feb 2015 12:09:53 -0800 Subject: [PATCH 139/321] f2fs: allocate data blocks in advance for f2fs_direct_IO This patch adds preallocation for data blocks to prepare f2fs_direct_IO. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/data.c Change-Id: I98ef07050cdc9b1a552858c314a58b80b082cdba --- fs/f2fs/data.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9b02a1a7..e88065e4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -589,6 +589,56 @@ static int __allocate_data_block(struct dnode_of_data *dn) return 0; } +static void __allocate_data_blocks(struct inode *inode, loff_t offset, + size_t count) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + u64 start = F2FS_BYTES_TO_BLK(offset); + u64 len = F2FS_BYTES_TO_BLK(count); + bool allocated; + u64 end_offset; + + while (len) { + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); + + /* When reading holes, we need its node page */ + set_new_dnode(&dn, inode, NULL, NULL, 0); + if (get_dnode_of_data(&dn, start, ALLOC_NODE)) + goto out; + + allocated = false; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); + + while (dn.ofs_in_node < end_offset && len) { + if (dn.data_blkaddr == NULL_ADDR) { + if (__allocate_data_block(&dn)) + goto sync_out; + allocated = true; + } + len--; + start++; + dn.ofs_in_node++; + } + + if (allocated) + sync_inode_page(&dn); + + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + } + return; + +sync_out: + if (allocated) + sync_inode_page(&dn); + f2fs_put_dnode(&dn); +out: + f2fs_unlock_op(sbi); + return; +} + /* * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. * If original data blocks are allocated, then give them to blockdev. @@ -614,10 +664,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (check_extent_cache(inode, pgofs, bh_result)) goto out; - if (create) { - f2fs_balance_fs(F2FS_I_SB(inode)); + if (create) f2fs_lock_op(F2FS_I_SB(inode)); - } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -1107,6 +1155,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, trace_f2fs_direct_IO_enter(inode, offset, count, rw); + if (rw & WRITE) + __allocate_data_blocks(inode, offset, count); + err = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, get_data_block); if (err < 0 && (rw & WRITE)) From 4a58da3158b6ef5510652175134fcf7ffaaca0ff Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 10 Feb 2015 16:23:12 -0800 Subject: [PATCH 140/321] f2fs: fix sparse warnings This patch resolves the following warnings. include/trace/events/f2fs.h:150:1: warning: expression using sizeof bool include/trace/events/f2fs.h:180:1: warning: expression using sizeof bool include/trace/events/f2fs.h:990:1: warning: expression using sizeof bool include/trace/events/f2fs.h:990:1: warning: expression using sizeof bool include/trace/events/f2fs.h:150:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) include/trace/events/f2fs.h:180:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) include/trace/events/f2fs.h:990:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) include/trace/events/f2fs.h:990:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) fs/f2fs/checkpoint.c:27:19: warning: symbol 'inode_entry_slab' was not declared. Should it be static? fs/f2fs/checkpoint.c:577:15: warning: cast to restricted __le32 fs/f2fs/checkpoint.c:592:15: warning: cast to restricted __le32 fs/f2fs/trace.c:19:1: warning: symbol 'pids' was not declared. Should it be static? fs/f2fs/trace.c:21:21: warning: symbol 'last_io' was not declared. Should it be static? Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/trace.c Change-Id: I7a8eb950522ca7ba507f6efe0a357c25a18cd616 --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/f2fs.h | 1 + fs/f2fs/gc.h | 2 -- include/trace/events/f2fs.h | 13 +++++++------ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 612c3413..02f7e5e3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -573,7 +573,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp1; - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp1; @@ -588,7 +588,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp2; - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp2; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d92d82df..da111850 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1675,6 +1675,7 @@ extern const struct address_space_operations f2fs_meta_aops; extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; +extern struct kmem_cache *inode_entry_slab; /* * inline.c diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index d5ff97c5..b4a65be9 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -40,8 +40,6 @@ struct gc_inode_list { struct radix_tree_root iroot; }; -extern struct kmem_cache *inode_entry_slab; - /* * inline functions */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 46f70fc5..d38db971 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -148,14 +148,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, TRACE_EVENT(f2fs_sync_file_exit, - TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret), + TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret), TP_ARGS(inode, need_cp, datasync, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(bool, need_cp) + __field(int, need_cp) __field(int, datasync) __field(int, ret) ), @@ -184,7 +184,7 @@ TRACE_EVENT(f2fs_sync_fs, TP_STRUCT__entry( __field(dev_t, dev) - __field(bool, dirty) + __field(int, dirty) __field(int, wait) ), @@ -985,14 +985,15 @@ TRACE_EVENT(f2fs_issue_discard, TRACE_EVENT(f2fs_issue_flush, - TP_PROTO(struct super_block *sb, bool nobarrier, bool flush_merge), + TP_PROTO(struct super_block *sb, unsigned int nobarrier, + unsigned int flush_merge), TP_ARGS(sb, nobarrier, flush_merge), TP_STRUCT__entry( __field(dev_t, dev) - __field(bool, nobarrier) - __field(bool, flush_merge) + __field(unsigned int, nobarrier) + __field(unsigned int, flush_merge) ), TP_fast_assign( From eb810c453cd9d111a1a889b11f8e087317678974 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 10 Feb 2015 16:44:29 -0800 Subject: [PATCH 141/321] f2fs: avoid variable length array Instead of using variable length array, this patch let preallocate memory for them. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 1 + fs/f2fs/segment.c | 10 ++++++++-- fs/f2fs/segment.h | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ac2bd8e7..e671373c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -142,6 +142,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += SIT_VBLOCK_MAP_SIZE; if (sbi->segs_per_sec > 1) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cae78a6f..7dcd99af 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -562,7 +562,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; - unsigned long dmap[entries]; + unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; bool force = (cpc->reason == CP_DISCARD); int i; @@ -981,7 +981,7 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi, { struct seg_entry *se = get_seg_entry(sbi, seg->segno); int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); - unsigned long target_map[entries]; + unsigned long *target_map = SIT_I(sbi)->tmp_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; unsigned long *cur_map = (unsigned long *)se->cur_valid_map; int i, pos; @@ -1912,6 +1912,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; } + sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->tmp_map) + return -ENOMEM; + if (sbi->segs_per_sec > 1) { sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry)); @@ -2293,6 +2297,8 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) kfree(sit_i->sentries[start].ckpt_valid_map); } } + kfree(sit_i->tmp_map); + vfree(sit_i->sentries); vfree(sit_i->sec_entries); kfree(sit_i->dirty_sentries_bitmap); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 421d5794..ba858ca0 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -189,6 +189,7 @@ struct sit_info { char *sit_bitmap; /* SIT bitmap pointer */ unsigned int bitmap_size; /* SIT bitmap size */ + unsigned long *tmp_map; /* bitmap for temporal use */ unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ unsigned int dirty_sentries; /* # of dirty sentries */ unsigned int sents_per_block; /* # of SIT entries per block */ From c3ac02b05926e1c0c6d2c8bf532b5033e440d0f5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Feb 2015 11:25:11 -0800 Subject: [PATCH 142/321] f2fs: fix accessing wrong indexed data blocks This patch fixes the following test. This causes: attempt to access beyond end of device sdb2: rw=16384, want=14413962000, limit=16777216 The reason is: - f2fs_write_begin - f2fs_convert_inline_inode returns -ENOSPC - f2fs_write_failed - truncate_blocks - truncate_partial_data_page - find_data_page - get_dnode_of_data returns wrong data index retrieved from inline_data - f2fs_submit_page_bio(wrong data index) - submit_bio(wrong data index) Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 58473c5d..ede8f815 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -473,7 +473,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct page *npage[4]; - struct page *parent; + struct page *parent = NULL; int offset[4]; unsigned int noffset[4]; nid_t nids[4]; @@ -490,6 +490,14 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (IS_ERR(npage[0])) return PTR_ERR(npage[0]); } + + /* if inline_data is set, should not report any block indices */ + if (f2fs_has_inline_data(dn->inode) && index) { + err = -EINVAL; + f2fs_put_page(npage[0], 1); + goto release_out; + } + parent = npage[0]; if (level != 0) nids[1] = get_nid(parent, offset[0], true); From 73b333e9079d6093dfea4450c904fdfcefac050b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 11 Feb 2015 18:20:38 +0800 Subject: [PATCH 143/321] f2fs: use spinlock for segmap_lock instead of rwlock rwlock can provide better concurrency when there are much more readers than writers because readers can hold the rwlock simultaneously. But now, for segmap_lock rwlock in struct free_segmap_info, there is only one reader 'mount' from below call path: ->f2fs_fill_super ->build_segment_manager ->build_dirty_segmap ->init_dirty_segmap ->find_next_inuse read_lock ... read_unlock Now that our concurrency can not be improved since there is no other reader for this lock, we do not need to use rwlock_t type for segmap_lock, let's replace it with spinlock_t type. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- fs/f2fs/segment.h | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7dcd99af..d9c5e188 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -857,7 +857,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, int go_left = 0; int i; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, @@ -930,7 +930,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); __set_inuse(sbi, segno); *newseg = segno; - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) @@ -1980,7 +1980,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); free_i->free_segments = 0; free_i->free_sections = 0; - rwlock_init(&free_i->segmap_lock); + spin_lock_init(&free_i->segmap_lock); return 0; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index ba858ca0..7fd35111 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -208,7 +208,7 @@ struct free_segmap_info { unsigned int start_segno; /* start segment number logically */ unsigned int free_segments; /* # of free segments */ unsigned int free_sections; /* # of free sections */ - rwlock_t segmap_lock; /* free segmap lock */ + spinlock_t segmap_lock; /* free segmap lock */ unsigned long *free_segmap; /* free segment bitmap */ unsigned long *free_secmap; /* free section bitmap */ }; @@ -319,9 +319,9 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, unsigned int max, unsigned int segno) { unsigned int ret; - read_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); ret = find_next_bit(free_i->free_segmap, max, segno); - read_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); return ret; } @@ -332,7 +332,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int start_segno = secno * sbi->segs_per_sec; unsigned int next; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); free_i->free_segments++; @@ -341,7 +341,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void __set_inuse(struct f2fs_sb_info *sbi, @@ -363,7 +363,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int start_segno = secno * sbi->segs_per_sec; unsigned int next; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (test_and_clear_bit(segno, free_i->free_segmap)) { free_i->free_segments++; @@ -374,7 +374,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, free_i->free_sections++; } } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, @@ -382,13 +382,13 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int secno = segno / sbi->segs_per_sec; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (!test_and_set_bit(segno, free_i->free_segmap)) { free_i->free_segments--; if (!test_and_set_bit(secno, free_i->free_secmap)) free_i->free_sections--; } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, From bbc9a2b565bb7cbe86cc7939d3721edc5f78bb4d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Feb 2015 16:15:44 +0800 Subject: [PATCH 144/321] f2fs: remove unused inline_dentry_addr inline_dentry_addr is introduced with inline dentry feature without being used, now we do not need to keep it for any reason, so remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index da111850..79e09b51 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1284,12 +1284,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); } -static inline void *inline_dentry_addr(struct page *page) -{ - struct f2fs_inode *ri = F2FS_INODE(page); - return (void *)&(ri->i_addr[1]); -} - static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) { if (!f2fs_has_inline_dentry(dir)) From e32a593f1ab0b2f0d46870eed6bdabe2230a6f23 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Feb 2015 16:17:20 +0800 Subject: [PATCH 145/321] f2fs: introduce f2fs_update_dentry to clean up duplicated codes This patch introduces f2fs_update_dentry to remove redundant code in f2fs_add_inline_entry and __f2fs_add_link. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 33 +++++++++++++++++++++++---------- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/inline.c | 18 ++++++------------ 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b317f533..ffaf9343 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -489,6 +489,24 @@ int room_for_filename(const void *bitmap, int slots, int max_slots) goto next; } +void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, + const struct qstr *name, f2fs_hash_t name_hash, + unsigned int bit_pos) +{ + struct f2fs_dir_entry *de; + int slots = GET_DENTRY_SLOTS(name->len); + int i; + + de = &d->dentry[bit_pos]; + de->hash_code = name_hash; + de->name_len = cpu_to_le16(name->len); + memcpy(d->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, (void *)d->bitmap); +} + /* * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). @@ -501,15 +519,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, unsigned int current_depth; unsigned long bidx, block; f2fs_hash_t dentry_hash; - struct f2fs_dir_entry *de; unsigned int nbucket, nblock; size_t namelen = name->len; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; + struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(namelen); struct page *page; int err = 0; - int i; if (f2fs_has_inline_dentry(dir)) { err = f2fs_add_inline_entry(dir, name, inode); @@ -568,14 +585,10 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, err = PTR_ERR(page); goto fail; } - de = &dentry_blk->dentry[bit_pos]; - de->hash_code = dentry_hash; - de->name_len = cpu_to_le16(namelen); - memcpy(dentry_blk->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); - for (i = 0; i < slots; i++) - test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + f2fs_update_dentry(inode, &d, name, dentry_hash, bit_pos); + set_page_dirty(dentry_page); /* we don't need to mark_inode_dirty now */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 79e09b51..e7f2b187 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1370,6 +1370,8 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); +void f2fs_update_dentry(struct inode *, struct f2fs_dentry_ptr *, + const struct qstr *, f2fs_hash_t , unsigned int); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 61214c5a..ad23fd44 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -387,15 +387,12 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, struct page *ipage; unsigned int bit_pos; f2fs_hash_t name_hash; - struct f2fs_dir_entry *de; size_t namelen = name->len; struct f2fs_inline_dentry *dentry_blk = NULL; + struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(namelen); struct page *page; int err = 0; - int i; - - name_hash = f2fs_dentry_hash(name); ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) @@ -419,14 +416,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, } f2fs_wait_on_page_writeback(ipage, NODE); - de = &dentry_blk->dentry[bit_pos]; - de->hash_code = name_hash; - de->name_len = cpu_to_le16(namelen); - memcpy(dentry_blk->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); - for (i = 0; i < slots; i++) - test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + + name_hash = f2fs_dentry_hash(name); + make_dentry_ptr(&d, (void *)dentry_blk, 2); + f2fs_update_dentry(inode, &d, name, name_hash, bit_pos); + set_page_dirty(ipage); /* we don't need to mark_inode_dirty now */ From feb0586fb34d25ce189da70c8b414ba9ed845e97 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Feb 2015 16:19:22 +0800 Subject: [PATCH 146/321] f2fs: use ->writepage in sync_meta_pages This patch uses ->writepage of meta mapping in sync_meta_pages instead of f2fs_write_meta_page, by this way, in its caller we can ignore any changes (e.g. changing name) of this registered function. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 02f7e5e3..9b1cfa86 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -276,7 +276,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, if (!clear_page_dirty_for_io(page)) goto continue_unlock; - if (f2fs_write_meta_page(page, &wbc)) { + if (mapping->a_ops->writepage(page, &wbc)) { unlock_page(page); break; } From 1ef78db48b644d6c19c27b0a632975a86aac8a30 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Feb 2015 16:20:27 +0800 Subject: [PATCH 147/321] f2fs: fix incorrectly stat number of inline data inode We should stat inline data information for temp file in f2fs_tmpfile if we enable inline_data feature. Otherwise, inline data stat number will be wrong after this temp file is evicted. Change-Id: Ib34d1418c5e0796ac382fffd9ccc3754e36f972a Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim From 1399380729d95ba5d728366f3afe84a407e7db71 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:46:29 +0800 Subject: [PATCH 148/321] f2fs: move ext_lock out of struct extent_info Move ext_lock out of struct extent_info, then in the following patches we can use variables with struct extent_info type as a parameter to pass pure data. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++------ fs/f2fs/f2fs.h | 6 +----- fs/f2fs/inode.c | 7 +++++++ fs/f2fs/super.c | 2 +- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e88065e4..d69e7956 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -255,9 +255,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, if (is_inode_flag_set(fi, FI_NO_EXTENT)) return 0; - read_lock(&fi->ext.ext_lock); + read_lock(&fi->ext_lock); if (fi->ext.len == 0) { - read_unlock(&fi->ext.ext_lock); + read_unlock(&fi->ext_lock); return 0; } @@ -281,10 +281,10 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, bh_result->b_size = UINT_MAX; stat_inc_read_hit(inode->i_sb); - read_unlock(&fi->ext.ext_lock); + read_unlock(&fi->ext_lock); return 1; } - read_unlock(&fi->ext.ext_lock); + read_unlock(&fi->ext_lock); return 0; } @@ -306,7 +306,7 @@ void update_extent_cache(struct dnode_of_data *dn) fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; - write_lock(&fi->ext.ext_lock); + write_lock(&fi->ext_lock); start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; @@ -363,7 +363,7 @@ void update_extent_cache(struct dnode_of_data *dn) need_update = true; } end_update: - write_unlock(&fi->ext.ext_lock); + write_unlock(&fi->ext_lock); if (need_update) sync_inode_page(dn); return; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e7f2b187..0c54fc87 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -274,7 +274,6 @@ enum { #define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ struct extent_info { - rwlock_t ext_lock; /* rwlock for consistency */ unsigned int fofs; /* start offset in a file */ u32 blk_addr; /* start block address of the extent */ unsigned int len; /* length of the extent */ @@ -306,6 +305,7 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ + rwlock_t ext_lock; /* rwlock for single extent cache */ struct inode_entry *dirty_dir; /* the pointer of dirty dir */ struct radix_tree_root inmem_root; /* radix tree for inmem pages */ @@ -316,21 +316,17 @@ struct f2fs_inode_info { static inline void get_extent_info(struct extent_info *ext, struct f2fs_extent i_ext) { - write_lock(&ext->ext_lock); ext->fofs = le32_to_cpu(i_ext.fofs); ext->blk_addr = le32_to_cpu(i_ext.blk_addr); ext->len = le32_to_cpu(i_ext.len); - write_unlock(&ext->ext_lock); } static inline void set_raw_extent(struct extent_info *ext, struct f2fs_extent *i_ext) { - read_lock(&ext->ext_lock); i_ext->fofs = cpu_to_le32(ext->fofs); i_ext->blk_addr = cpu_to_le32(ext->blk_addr); i_ext->len = cpu_to_le32(ext->len); - read_unlock(&ext->ext_lock); } struct f2fs_nm_info { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 12028d9b..8d5f8185 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -129,7 +129,10 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; + write_lock(&fi->ext_lock); get_extent_info(&fi->ext, ri->i_ext); + write_unlock(&fi->ext_lock); + get_inline_info(fi, ri); /* check data exist */ @@ -219,7 +222,11 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_links = cpu_to_le32(inode->i_nlink); ri->i_size = cpu_to_le64(i_size_read(inode)); ri->i_blocks = cpu_to_le64(inode->i_blocks); + + read_lock(&F2FS_I(inode)->ext_lock); set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); + read_unlock(&F2FS_I(inode)->ext_lock); + set_raw_inline(F2FS_I(inode), ri); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 96a7d0c0..3742f795 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -378,7 +378,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; fi->i_advise = 0; - rwlock_init(&fi->ext.ext_lock); + rwlock_init(&fi->ext_lock); init_rwsem(&fi->i_sem); INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); INIT_LIST_HEAD(&fi->inmem_pages); From 006e9d5fb13ed56d65b2a79e65ba098929ab480d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:47:25 +0800 Subject: [PATCH 149/321] f2fs: simplfy a field name in struct f2fs_extent,extent_info Rename a filed name from 'blk_addr' to 'blk' in struct {f2fs_extent,extent_info} as annotation of this field descripts its meaning well to us. By this way, we can avoid long statement in code of following patches. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 13 ++++++------- fs/f2fs/f2fs.h | 6 +++--- include/linux/f2fs_fs.h | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d69e7956..cdefacbf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -265,7 +265,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; - start_blkaddr = fi->ext.blk_addr; + start_blkaddr = fi->ext.blk; if (pgofs >= start_fofs && pgofs <= end_fofs) { unsigned int blkbits = inode->i_sb->s_blocksize_bits; @@ -310,8 +310,8 @@ void update_extent_cache(struct dnode_of_data *dn) start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; - start_blkaddr = fi->ext.blk_addr; - end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; + start_blkaddr = fi->ext.blk; + end_blkaddr = fi->ext.blk + fi->ext.len - 1; /* Drop and initialize the matched extent */ if (fi->ext.len == 1 && fofs == start_fofs) @@ -321,7 +321,7 @@ void update_extent_cache(struct dnode_of_data *dn) if (fi->ext.len == 0) { if (dn->data_blkaddr != NULL_ADDR) { fi->ext.fofs = fofs; - fi->ext.blk_addr = dn->data_blkaddr; + fi->ext.blk = dn->data_blkaddr; fi->ext.len = 1; } goto end_update; @@ -330,7 +330,7 @@ void update_extent_cache(struct dnode_of_data *dn) /* Front merge */ if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { fi->ext.fofs--; - fi->ext.blk_addr--; + fi->ext.blk--; fi->ext.len++; goto end_update; } @@ -348,8 +348,7 @@ void update_extent_cache(struct dnode_of_data *dn) fi->ext.len = fofs - start_fofs; } else { fi->ext.fofs = fofs + 1; - fi->ext.blk_addr = start_blkaddr + - fofs - start_fofs + 1; + fi->ext.blk = start_blkaddr + fofs - start_fofs + 1; fi->ext.len -= fofs - start_fofs + 1; } } else { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0c54fc87..11d90a56 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -275,7 +275,7 @@ enum { struct extent_info { unsigned int fofs; /* start offset in a file */ - u32 blk_addr; /* start block address of the extent */ + u32 blk; /* start block address of the extent */ unsigned int len; /* length of the extent */ }; @@ -317,7 +317,7 @@ static inline void get_extent_info(struct extent_info *ext, struct f2fs_extent i_ext) { ext->fofs = le32_to_cpu(i_ext.fofs); - ext->blk_addr = le32_to_cpu(i_ext.blk_addr); + ext->blk = le32_to_cpu(i_ext.blk); ext->len = le32_to_cpu(i_ext.len); } @@ -325,7 +325,7 @@ static inline void set_raw_extent(struct extent_info *ext, struct f2fs_extent *i_ext) { i_ext->fofs = cpu_to_le32(ext->fofs); - i_ext->blk_addr = cpu_to_le32(ext->blk_addr); + i_ext->blk = cpu_to_le32(ext->blk); i_ext->len = cpu_to_le32(ext->len); } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 0b9ef60b..ac330cf6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -152,7 +152,7 @@ struct f2fs_orphan_block { */ struct f2fs_extent { __le32 fofs; /* start file offset of the extent */ - __le32 blk_addr; /* start block address of the extent */ + __le32 blk; /* start block address of the extent */ __le32 len; /* lengh of the extent */ } __packed; From d56212c28b2db137ac763171dc422e7b0cc89978 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:50:30 +0800 Subject: [PATCH 150/321] f2fs: introduce f2fs_map_bh to clean codes of check_extent_cache This patch introduces f2fs_map_bh to clean codes of check_extent_cache. v2: o cleanup f2fs_map_bh pointed out by Jaegeuk Kim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdefacbf..78650efc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -245,8 +245,23 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) return err; } +static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, + struct extent_info *ei, struct buffer_head *bh_result) +{ + unsigned int blkbits = sb->s_blocksize_bits; + size_t count; + + set_buffer_new(bh_result); + map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); + count = ei->fofs + ei->len - pgofs; + if (count < (UINT_MAX >> blkbits)) + bh_result->b_size = (count << blkbits); + else + bh_result->b_size = UINT_MAX; +} + static int check_extent_cache(struct inode *inode, pgoff_t pgofs, - struct buffer_head *bh_result) + struct extent_info *ei) { struct f2fs_inode_info *fi = F2FS_I(inode); pgoff_t start_fofs, end_fofs; @@ -268,18 +283,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, start_blkaddr = fi->ext.blk; if (pgofs >= start_fofs && pgofs <= end_fofs) { - unsigned int blkbits = inode->i_sb->s_blocksize_bits; - size_t count; - - set_buffer_new(bh_result); - map_bh(bh_result, inode->i_sb, - start_blkaddr + pgofs - start_fofs); - count = end_fofs - pgofs + 1; - if (count < (UINT_MAX >> blkbits)) - bh_result->b_size = (count << blkbits); - else - bh_result->b_size = UINT_MAX; - + *ei = fi->ext; stat_inc_read_hit(inode->i_sb); read_unlock(&fi->ext_lock); return 1; @@ -655,13 +659,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock, int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; pgoff_t pgofs, end_offset; int err = 0, ofs = 1; + struct extent_info ei; bool allocated = false; /* Get the page offset from the block offset(iblock) */ pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); - if (check_extent_cache(inode, pgofs, bh_result)) + if (check_extent_cache(inode, pgofs, &ei)) { + f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result); goto out; + } if (create) f2fs_lock_op(F2FS_I_SB(inode)); From 1b45b896cbfabbafc51d410528f0ec39b61b9e52 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:51:34 +0800 Subject: [PATCH 151/321] f2fs: introduce universal lookup/update interface for extent cache In this patch, we do these jobs: 1. rename {check,update}_extent_cache to {lookup,update}_extent_info; 2. introduce universal lookup/update interface of extent cache: f2fs_{lookup,update}_extent_cache including above two real functions, then export them to function callers. So after above cleanup, we can add new rb-tree based extent cache into exported interfaces. v2: o remove "f2fs_" for inner function {lookup,update}_extent_info suggested by Jaegeuk Kim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 66 ++++++++++++++++++++++++++++------------------ fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/inline.c | 2 +- fs/f2fs/recovery.c | 2 +- 5 files changed, 44 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 78650efc..70e6c35d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -260,20 +260,20 @@ static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, bh_result->b_size = UINT_MAX; } -static int check_extent_cache(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei) +static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) { struct f2fs_inode_info *fi = F2FS_I(inode); pgoff_t start_fofs, end_fofs; block_t start_blkaddr; if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return 0; + return false; read_lock(&fi->ext_lock); if (fi->ext.len == 0) { read_unlock(&fi->ext_lock); - return 0; + return false; } stat_inc_total_hit(inode->i_sb); @@ -286,29 +286,22 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, *ei = fi->ext; stat_inc_read_hit(inode->i_sb); read_unlock(&fi->ext_lock); - return 1; + return true; } read_unlock(&fi->ext_lock); - return 0; + return false; } -void update_extent_cache(struct dnode_of_data *dn) +static bool update_extent_info(struct inode *inode, pgoff_t fofs, + block_t blkaddr) { - struct f2fs_inode_info *fi = F2FS_I(dn->inode); - pgoff_t fofs, start_fofs, end_fofs; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; int need_update = true; - f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); - - /* Update the page address in the parent node */ - __set_data_blkaddr(dn); - if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return; - - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + - dn->ofs_in_node; + return false; write_lock(&fi->ext_lock); @@ -323,16 +316,16 @@ void update_extent_cache(struct dnode_of_data *dn) /* Initial extent */ if (fi->ext.len == 0) { - if (dn->data_blkaddr != NULL_ADDR) { + if (blkaddr != NULL_ADDR) { fi->ext.fofs = fofs; - fi->ext.blk = dn->data_blkaddr; + fi->ext.blk = blkaddr; fi->ext.len = 1; } goto end_update; } /* Front merge */ - if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { + if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) { fi->ext.fofs--; fi->ext.blk--; fi->ext.len++; @@ -340,7 +333,7 @@ void update_extent_cache(struct dnode_of_data *dn) } /* Back merge */ - if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { + if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) { fi->ext.len++; goto end_update; } @@ -367,9 +360,30 @@ void update_extent_cache(struct dnode_of_data *dn) } end_update: write_unlock(&fi->ext_lock); - if (need_update) + return need_update; +} + +static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + return lookup_extent_info(inode, pgofs, ei); +} + +void f2fs_update_extent_cache(struct dnode_of_data *dn) +{ + struct f2fs_inode_info *fi = F2FS_I(dn->inode); + pgoff_t fofs; + + f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); + + /* Update the page address in the parent node */ + __set_data_blkaddr(dn); + + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + + if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) sync_inode_page(dn); - return; } struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) @@ -665,7 +679,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, /* Get the page offset from the block offset(iblock) */ pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); - if (check_extent_cache(inode, pgofs, &ei)) { + if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) { f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result); goto out; } @@ -832,7 +846,7 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); } else { write_data_page(page, &dn, fio); - update_extent_cache(&dn); + f2fs_update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); } out_writepage: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 11d90a56..51c8ba1d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1507,7 +1507,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); -void update_extent_cache(struct dnode_of_data *); +void f2fs_update_extent_cache(struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1baefb4f..986e17bb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -445,7 +445,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) continue; dn->data_blkaddr = NULL_ADDR; - update_extent_cache(dn); + f2fs_update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); nr_free++; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index ad23fd44..47f5ee0a 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -122,7 +122,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_page_writeback(page); fio.blk_addr = dn->data_blkaddr; write_data_page(page, dn, &fio); - update_extent_cache(dn); + f2fs_update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); if (dirty) inode_dec_dirty_pages(dn->inode); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7c312dd7..667874bf 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -401,7 +401,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); dn.data_blkaddr = dest; - update_extent_cache(&dn); + f2fs_update_extent_cache(&dn); recovered++; } dn.ofs_in_node++; From 2cdc7274e25083e2e45b65ba5c11ce88b2bccc66 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:52:58 +0800 Subject: [PATCH 152/321] f2fs: introduce infra macro and data structure of rb-tree extent cache Introduce infra macro and data structure for rb-tree based extent cache: Macros: * EXT_TREE_VEC_SIZE: indicate vector size for gang lookup in extent tree. * F2FS_MIN_EXTENT_LEN: indicate minimum length of extent managed in cache. * EXTENT_CACHE_SHRINK_NUMBER: indicate number of extent in cache will be shrunk. Basic data structures for extent cache: * struct extent_tree: extent tree entry per inode. * struct extent_node: extent info node linked in extent tree. Besides, adding new extent cache related fields in f2fs_sb_info. Signed-off-by: Chao Yu Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 36 ++++++++++++++++++++++++++++++++---- fs/f2fs/node.h | 1 + 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 51c8ba1d..7e46b3c0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -270,13 +270,33 @@ enum { #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ +/* vector size for gang look-up from extent cache that consists of radix tree */ +#define EXT_TREE_VEC_SIZE 64 + /* for in-memory extent cache entry */ -#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ +#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ + +/* number of extent info in extent cache we try to shrink */ +#define EXTENT_CACHE_SHRINK_NUMBER 128 struct extent_info { - unsigned int fofs; /* start offset in a file */ - u32 blk; /* start block address of the extent */ - unsigned int len; /* length of the extent */ + unsigned int fofs; /* start offset in a file */ + u32 blk; /* start block address of the extent */ + unsigned int len; /* length of the extent */ +}; + +struct extent_node { + struct rb_node rb_node; /* rb node located in rb-tree */ + struct list_head list; /* node in global extent list of sbi */ + struct extent_info ei; /* extent info */ +}; + +struct extent_tree { + nid_t ino; /* inode number */ + struct rb_root root; /* root of extent info rb-tree */ + rwlock_t lock; /* protect extent info rb-tree */ + atomic_t refcount; /* reference count of rb-tree */ + unsigned int count; /* # of extent node in rb-tree*/ }; /* @@ -564,6 +584,14 @@ struct f2fs_sb_info { struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ + /* for extent tree cache */ + struct radix_tree_root extent_tree_root;/* cache extent cache entries */ + struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ + struct list_head extent_list; /* lru list for shrinker */ + spinlock_t extent_lock; /* locking extent lru list */ + int total_ext_tree; /* extent tree count */ + atomic_t total_ext_node; /* extent info count */ + /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_blocksize; /* log2 block size */ diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index f405bbf2..c56026f1 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -120,6 +120,7 @@ enum mem_type { NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ + EXTENT_CACHE, /* indicates extent cache */ BASE_CHECK, /* check kernel status */ }; From 931bc2e623569d18766d201c033b5346a324e5b9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:54:31 +0800 Subject: [PATCH 153/321] f2fs: add core functions for rb-tree extent cache This patch adds core functions including slab cache init function and init/lookup/update/shrink/destroy function for rb-tree based extent cache. Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail design and implementation of extent cache. Todo: * register rb-based extent cache shrink with mm shrink interface. v2: o move set_extent_info and __is_{extent,back,front}_mergeable into f2fs.h. o introduce __{attach,detach}_extent_node for code readability. o add cond_resched() when fail to invoke kmem_cache_alloc/radix_tree_insert. o fix some coding style and typo issues. v3: o fix oops due to using an unassigned pointer. o use list_del to remove extent node in shrink list. Signed-off-by: Chao Yu Signed-off-by: Changman Lee [Jaegeuk Kim: add static for some funcitons and declare in f2fs.h] Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/f2fs.h Change-Id: Ie1fce6a94cf9025f10421779c6f7f11a540db62b --- fs/f2fs/data.c | 411 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 32 ++++ fs/f2fs/node.c | 9 +- 3 files changed, 451 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 70e6c35d..63b30153 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -24,6 +24,9 @@ #include "segment.h" #include +static struct kmem_cache *extent_tree_slab; +static struct kmem_cache *extent_node_slab; + static void f2fs_read_end_io(struct bio *bio, int err) { struct bio_vec *bvec; @@ -363,6 +366,383 @@ static bool update_extent_info(struct inode *inode, pgoff_t fofs, return need_update; } +static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_info *ei, + struct rb_node *parent, struct rb_node **p) +{ + struct extent_node *en; + + en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); + if (!en) + return NULL; + + en->ei = *ei; + INIT_LIST_HEAD(&en->list); + + rb_link_node(&en->rb_node, parent, p); + rb_insert_color(&en->rb_node, &et->root); + et->count++; + atomic_inc(&sbi->total_ext_node); + return en; +} + +static void __detach_extent_node(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_node *en) +{ + rb_erase(&en->rb_node, &et->root); + et->count--; + atomic_dec(&sbi->total_ext_node); +} + +static struct extent_node *__lookup_extent_tree(struct extent_tree *et, + unsigned int fofs) +{ + struct rb_node *node = et->root.rb_node; + struct extent_node *en; + + while (node) { + en = rb_entry(node, struct extent_node, rb_node); + + if (fofs < en->ei.fofs) + node = node->rb_left; + else if (fofs >= en->ei.fofs + en->ei.len) + node = node->rb_right; + else + return en; + } + return NULL; +} + +static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_node *en) +{ + struct extent_node *prev; + struct rb_node *node; + + node = rb_prev(&en->rb_node); + if (!node) + return NULL; + + prev = rb_entry(node, struct extent_node, rb_node); + if (__is_back_mergeable(&en->ei, &prev->ei)) { + en->ei.fofs = prev->ei.fofs; + en->ei.blk = prev->ei.blk; + en->ei.len += prev->ei.len; + __detach_extent_node(sbi, et, prev); + return prev; + } + return NULL; +} + +static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_node *en) +{ + struct extent_node *next; + struct rb_node *node; + + node = rb_next(&en->rb_node); + if (!node) + return NULL; + + next = rb_entry(node, struct extent_node, rb_node); + if (__is_front_mergeable(&en->ei, &next->ei)) { + en->ei.len += next->ei.len; + __detach_extent_node(sbi, et, next); + return next; + } + return NULL; +} + +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_info *ei, + struct extent_node **den) +{ + struct rb_node **p = &et->root.rb_node; + struct rb_node *parent = NULL; + struct extent_node *en; + + while (*p) { + parent = *p; + en = rb_entry(parent, struct extent_node, rb_node); + + if (ei->fofs < en->ei.fofs) { + if (__is_front_mergeable(ei, &en->ei)) { + f2fs_bug_on(sbi, !den); + en->ei.fofs = ei->fofs; + en->ei.blk = ei->blk; + en->ei.len += ei->len; + *den = __try_back_merge(sbi, et, en); + return en; + } + p = &(*p)->rb_left; + } else if (ei->fofs >= en->ei.fofs + en->ei.len) { + if (__is_back_mergeable(ei, &en->ei)) { + f2fs_bug_on(sbi, !den); + en->ei.len += ei->len; + *den = __try_front_merge(sbi, et, en); + return en; + } + p = &(*p)->rb_right; + } else { + f2fs_bug_on(sbi, 1); + } + } + + return __attach_extent_node(sbi, et, ei, parent, p); +} + +static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, + struct extent_tree *et, bool free_all) +{ + struct rb_node *node, *next; + struct extent_node *en; + unsigned int count = et->count; + + node = rb_first(&et->root); + while (node) { + next = rb_next(node); + en = rb_entry(node, struct extent_node, rb_node); + + if (free_all) { + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) + list_del_init(&en->list); + spin_unlock(&sbi->extent_lock); + } + + if (free_all || list_empty(&en->list)) { + __detach_extent_node(sbi, et, en); + kmem_cache_free(extent_node_slab, en); + } + node = next; + } + + return count - et->count; +} + +static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + struct extent_node *en; + + if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + return false; + + down_read(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); + if (!et) { + up_read(&sbi->extent_tree_lock); + return false; + } + atomic_inc(&et->refcount); + up_read(&sbi->extent_tree_lock); + + read_lock(&et->lock); + en = __lookup_extent_tree(et, pgofs); + if (en) { + *ei = en->ei; + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) + list_move_tail(&en->list, &sbi->extent_list); + spin_unlock(&sbi->extent_lock); + stat_inc_read_hit(sbi->sb); + } + stat_inc_total_hit(sbi->sb); + read_unlock(&et->lock); + + atomic_dec(&et->refcount); + return en ? true : false; +} + +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, + block_t blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + nid_t ino = inode->i_ino; + struct extent_tree *et; + struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; + struct extent_node *den = NULL; + struct extent_info ei, dei; + unsigned int endofs; + + if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + return; + + down_write(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, ino); + if (!et) { + et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); + f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); + memset(et, 0, sizeof(struct extent_tree)); + et->ino = ino; + et->root = RB_ROOT; + rwlock_init(&et->lock); + atomic_set(&et->refcount, 0); + et->count = 0; + sbi->total_ext_tree++; + } + atomic_inc(&et->refcount); + up_write(&sbi->extent_tree_lock); + + write_lock(&et->lock); + + /* 1. lookup and remove existing extent info in cache */ + en = __lookup_extent_tree(et, fofs); + if (!en) + goto update_extent; + + dei = en->ei; + __detach_extent_node(sbi, et, en); + + /* 2. if extent can be split more, split and insert the left part */ + if (dei.len > 1) { + /* insert left part of split extent into cache */ + if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) { + set_extent_info(&ei, dei.fofs, dei.blk, + fofs - dei.fofs); + en1 = __insert_extent_tree(sbi, et, &ei, NULL); + } + + /* insert right part of split extent into cache */ + endofs = dei.fofs + dei.len - 1; + if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) { + set_extent_info(&ei, fofs + 1, + fofs - dei.fofs + dei.blk, endofs - fofs); + en2 = __insert_extent_tree(sbi, et, &ei, NULL); + } + } + +update_extent: + /* 3. update extent in extent cache */ + if (blkaddr) { + set_extent_info(&ei, fofs, blkaddr, 1); + en3 = __insert_extent_tree(sbi, et, &ei, &den); + } + + /* 4. update in global extent list */ + spin_lock(&sbi->extent_lock); + if (en && !list_empty(&en->list)) + list_del(&en->list); + /* + * en1 and en2 split from en, they will become more and more smaller + * fragments after splitting several times. So if the length is smaller + * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree. + */ + if (en1) + list_add_tail(&en1->list, &sbi->extent_list); + if (en2) + list_add_tail(&en2->list, &sbi->extent_list); + if (en3) { + if (list_empty(&en3->list)) + list_add_tail(&en3->list, &sbi->extent_list); + else + list_move_tail(&en3->list, &sbi->extent_list); + } + if (den && !list_empty(&den->list)) + list_del(&den->list); + spin_unlock(&sbi->extent_lock); + + /* 5. release extent node */ + if (en) + kmem_cache_free(extent_node_slab, en); + if (den) + kmem_cache_free(extent_node_slab, den); + + write_unlock(&et->lock); + atomic_dec(&et->refcount); +} + +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +{ + struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; + struct extent_node *en, *tmp; + unsigned long ino = F2FS_ROOT_INO(sbi); + struct radix_tree_iter iter; + void **slot; + unsigned int found; + + if (available_free_memory(sbi, EXTENT_CACHE)) + return; + + spin_lock(&sbi->extent_lock); + list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { + if (!nr_shrink--) + break; + list_del_init(&en->list); + } + spin_unlock(&sbi->extent_lock); + + down_read(&sbi->extent_tree_lock); + while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, + (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { + unsigned i; + + ino = treevec[found - 1]->ino + 1; + for (i = 0; i < found; i++) { + struct extent_tree *et = treevec[i]; + + atomic_inc(&et->refcount); + write_lock(&et->lock); + __free_extent_tree(sbi, et, false); + write_unlock(&et->lock); + atomic_dec(&et->refcount); + } + } + up_read(&sbi->extent_tree_lock); + + down_write(&sbi->extent_tree_lock); + radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter, + F2FS_ROOT_INO(sbi)) { + struct extent_tree *et = (struct extent_tree *)*slot; + + if (!atomic_read(&et->refcount) && !et->count) { + radix_tree_delete(&sbi->extent_tree_root, et->ino); + kmem_cache_free(extent_tree_slab, et); + sbi->total_ext_tree--; + } + } + up_write(&sbi->extent_tree_lock); +} + +void f2fs_destroy_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + + down_read(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); + if (!et) { + up_read(&sbi->extent_tree_lock); + goto out; + } + atomic_inc(&et->refcount); + up_read(&sbi->extent_tree_lock); + + /* free all extent info belong to this extent tree */ + write_lock(&et->lock); + __free_extent_tree(sbi, et, true); + write_unlock(&et->lock); + + atomic_dec(&et->refcount); + + /* try to find and delete extent tree entry in radix tree */ + down_write(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); + if (!et) { + up_write(&sbi->extent_tree_lock); + goto out; + } + f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); + radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); + kmem_cache_free(extent_tree_slab, et); + sbi->total_ext_tree--; + up_write(&sbi->extent_tree_lock); +out: + return; +} + static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { @@ -1254,6 +1634,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, get_data_block); } +void init_extent_cache_info(struct f2fs_sb_info *sbi) +{ + INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); + init_rwsem(&sbi->extent_tree_lock); + INIT_LIST_HEAD(&sbi->extent_list); + spin_lock_init(&sbi->extent_lock); + sbi->total_ext_tree = 0; + atomic_set(&sbi->total_ext_node, 0); +} + +int __init create_extent_cache(void) +{ + extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", + sizeof(struct extent_tree)); + if (!extent_tree_slab) + return -ENOMEM; + extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", + sizeof(struct extent_node)); + if (!extent_node_slab) { + kmem_cache_destroy(extent_tree_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_extent_cache(void) +{ + kmem_cache_destroy(extent_node_slab); + kmem_cache_destroy(extent_tree_slab); +} + const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, .readpages = f2fs_read_data_pages, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7e46b3c0..b677e3e5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -349,6 +349,33 @@ static inline void set_raw_extent(struct extent_info *ext, i_ext->len = cpu_to_le32(ext->len); } +static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, + u32 blk, unsigned int len) +{ + ei->fofs = fofs; + ei->blk = blk; + ei->len = len; +} + +static inline bool __is_extent_mergeable(struct extent_info *back, + struct extent_info *front) +{ + return (back->fofs + back->len == front->fofs && + back->blk + back->len == front->blk); +} + +static inline bool __is_back_mergeable(struct extent_info *cur, + struct extent_info *back) +{ + return __is_extent_mergeable(back, cur); +} + +static inline bool __is_front_mergeable(struct extent_info *cur, + struct extent_info *front) +{ + return __is_extent_mergeable(cur, front); +} + struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ @@ -1535,6 +1562,8 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); +void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); +void f2fs_destroy_extent_tree(struct inode *); void f2fs_update_extent_cache(struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); @@ -1542,6 +1571,9 @@ struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct page *, struct f2fs_io_info *); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); void f2fs_invalidate_page(struct page *, unsigned long); +void init_extent_cache_info(struct f2fs_sb_info *); +int __init create_extent_cache(void); +void destroy_extent_cache(void); int f2fs_release_page(struct page *, gfp_t); /* diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ede8f815..69417786 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -40,7 +40,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) /* only uses low memory */ avail_ram = val.totalram - val.totalhigh; - /* give 25%, 25%, 50%, 50% memory for each components respectively */ + /* + * give 25%, 25%, 50%, 50%, 50% memory for each components respectively + */ if (type == FREE_NIDS) { mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> PAGE_CACHE_SHIFT; @@ -61,6 +63,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size += (sbi->im[i].ino_num * sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == EXTENT_CACHE) { + mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) + + atomic_read(&sbi->total_ext_node) * + sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else { if (sbi->sb->s_bdi->dirty_exceeded) return false; From cb5eba5fef8da96d83cd71ea79356c0d30b86304 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Oct 2014 22:47:03 -0700 Subject: [PATCH 154/321] f2fs: introduce -o fastboot for reducing booting time only If a system wants to reduce the booting time as a top priority, now we can use a mount option, -o fastboot. With this option, f2fs conducts a little bit slow write_checkpoint, but it can avoid the node page reads during the next mount time. Change-Id: Ib9769eb160b2fe33caee5892357fadb91b83fd77 Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 3 +++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/file.c | 2 ++ fs/f2fs/gc.c | 4 +--- fs/f2fs/super.c | 7 +++++++ 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 199649a2..315bfba3 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -135,6 +135,9 @@ nobarrier This option can be used if underlying storage guarantees If this option is set, no cache_flush commands are issued but f2fs still guarantees the write ordering of all the data writes. +fastboot This option is used when a system wants to reduce mount + time as much as possible, even though normal performance + can be sacrificed. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b677e3e5..964974d7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -49,6 +49,7 @@ #define F2FS_MOUNT_INLINE_DENTRY 0x00000200 #define F2FS_MOUNT_FLUSH_MERGE 0x00000400 #define F2FS_MOUNT_NOBARRIER 0x00000800 +#define F2FS_MOUNT_FASTBOOT 0x00001000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -824,6 +825,8 @@ static inline int __get_cp_reason(struct f2fs_sb_info *sbi) { int reason = CP_SYNC; + if (test_opt(sbi, FASTBOOT)) + reason = CP_FASTBOOT; if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) reason = CP_UMOUNT; return reason; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 986e17bb..4c97b52d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -128,6 +128,8 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) need_cp = true; + else if (test_opt(sbi, FASTBOOT)) + need_cp = true; return need_cp; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 788619f2..76adbc36 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -692,9 +692,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) int gc_type = BG_GC; int nfree = 0; int ret = -1; - struct cp_control cpc = { - .reason = CP_SYNC, - }; + struct cp_control cpc; struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), .iroot = RADIX_TREE_INIT(GFP_NOFS), diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3742f795..caa93c3a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -54,6 +54,7 @@ enum { Opt_inline_dentry, Opt_flush_merge, Opt_nobarrier, + Opt_fastboot, Opt_err, }; @@ -73,6 +74,7 @@ static match_table_t f2fs_tokens = { {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, + {Opt_fastboot, "fastboot"}, {Opt_err, NULL}, }; @@ -353,6 +355,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; + case Opt_fastboot: + set_opt(sbi, FASTBOOT); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -581,6 +586,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) seq_puts(seq, ",nobarrier"); + if (test_opt(sbi, FASTBOOT)) + seq_puts(seq, ",fastboot"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; From 1a1630b4aabae2ab4f78a25776111a79f56451d3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 29 Jan 2015 11:45:33 -0800 Subject: [PATCH 155/321] f2fs: split UMOUNT and FASTBOOT flags This patch adds FASTBOOT flag into checkpoint as follows. - CP_UMOUNT_FLAG is set when system is umounted. - CP_FASTBOOT_FLAG is set when intermediate checkpoint having node summaries was done. So, if you get CP_UMOUNT_FLAG from checkpoint, the system was umounted cleanly. Instead, if there was sudden-power-off, you can get CP_FASTBOOT_FLAG or nothing. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/checkpoint.c fs/f2fs/f2fs.h fs/f2fs/super.c Change-Id: I5a0b8ff3ffcbaa47bce323cbc260f3c5af4f7a4b --- fs/f2fs/checkpoint.c | 5 +++++ fs/f2fs/f2fs.h | 6 ++++-- fs/f2fs/super.c | 1 + include/linux/f2fs_fs.h | 1 + include/trace/events/f2fs.h | 1 + 5 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9b1cfa86..2716d15d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -952,6 +952,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + if (cpc->reason == CP_FASTBOOT) + set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + else + clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 964974d7..ea0ec705 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -100,6 +100,7 @@ enum { enum { CP_UMOUNT, + CP_FASTBOOT, CP_SYNC, CP_DISCARD, }; @@ -834,12 +835,13 @@ static inline int __get_cp_reason(struct f2fs_sb_info *sbi) static inline bool __remain_node_summaries(int reason) { - return (reason == CP_UMOUNT); + return (reason == CP_UMOUNT || reason == CP_FASTBOOT); } static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) { - return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)); + return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) || + is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG)); } /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index caa93c3a..3694619f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -494,6 +494,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) struct cp_control cpc; cpc.reason = __get_cp_reason(sbi); + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ac330cf6..d69f35b7 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -91,6 +91,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_FASTBOOT_FLAG 0x00000020 #define CP_FSCK_FLAG 0x00000010 #define CP_ERROR_FLAG 0x00000008 #define CP_COMPACT_SUM_FLAG 0x00000004 diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index d38db971..b83cc022 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -72,6 +72,7 @@ #define show_cpreason(type) \ __print_symbolic(type, \ { CP_UMOUNT, "Umount" }, \ + { CP_FASTBOOT, "Fastboot" }, \ { CP_SYNC, "Sync" }, \ { CP_DISCARD, "Discard" }) From 0d07e6dd872fa5bee30e9a8e7cde1fbc28d23e51 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:55:51 +0800 Subject: [PATCH 156/321] f2fs: add a mount option for rb-tree extent cache This patch adds a mount option 'extent_cache' in f2fs. It is try to use a rb-tree based extent cache to cache more mapping information with less memory if this option is set, otherwise we will use the original one extent info cache. Suggested-by: Changman Lee Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 4 ++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 7 +++++++ 3 files changed, 12 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 315bfba3..83272002 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -138,6 +138,10 @@ nobarrier This option can be used if underlying storage guarantees fastboot This option is used when a system wants to reduce mount time as much as possible, even though normal performance can be sacrificed. +extent_cache Enable an extent cache based on rb-tree, it can cache + as many as extent which map between contiguous logical + address and physical address per inode, resulting in + increasing the cache hit ratio. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ea0ec705..8e9b8439 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -50,6 +50,7 @@ #define F2FS_MOUNT_FLUSH_MERGE 0x00000400 #define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_FASTBOOT 0x00001000 +#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3694619f..7e89633e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -55,6 +55,7 @@ enum { Opt_flush_merge, Opt_nobarrier, Opt_fastboot, + Opt_extent_cache, Opt_err, }; @@ -75,6 +76,7 @@ static match_table_t f2fs_tokens = { {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, {Opt_fastboot, "fastboot"}, + {Opt_extent_cache, "extent_cache"}, {Opt_err, NULL}, }; @@ -358,6 +360,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_fastboot: set_opt(sbi, FASTBOOT); break; + case Opt_extent_cache: + set_opt(sbi, EXTENT_CACHE); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -589,6 +594,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",nobarrier"); if (test_opt(sbi, FASTBOOT)) seq_puts(seq, ",fastboot"); + if (test_opt(sbi, EXTENT_CACHE)) + seq_puts(seq, ",extent_cache"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; From c1946a18c5c7f666501e57de6cfefa16e677653b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:57:31 +0800 Subject: [PATCH 157/321] f2fs: enable rb-tree extent cache This patch enables rb-tree based extent cache in f2fs. When we mount with "-o extent_cache", f2fs will try to add recently accessed page-block mappings into rb-tree based extent cache as much as possible, instead of original one extent info cache. By this way, f2fs can support more effective cache between dnode page cache and disk. It will supply high hit ratio in the cache with fewer memory when dnode page cache are reclaimed in environment of low memory. Storage: Sandisk sd card 64g 1.append write file (offset: 0, size: 128M); 2.override write file (offset: 2M, size: 1M); 3.override write file (offset: 4M, size: 1M); ... 4.override write file (offset: 48M, size: 1M); ... 5.override write file (offset: 112M, size: 1M); 6.sync 7.echo 3 > /proc/sys/vm/drop_caches 8.read file (size:128M, unit: 4k, count: 32768) (time dd if=/mnt/f2fs/128m bs=4k count=32768) Extent Hit Ratio: before patched Hit Ratio 121 / 1071 1071 / 1071 Performance: before patched real 0m37.051s 0m35.556s user 0m0.040s 0m0.026s sys 0m2.990s 0m2.251s Memory Cost: before patched Tree Count: 0 1 (size: 24 bytes) Node Count: 0 45 (size: 1440 bytes) v3: o retest and given more details of test result. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 13 +++++++++++++ fs/f2fs/inode.c | 1 + fs/f2fs/segment.c | 3 +++ fs/f2fs/super.c | 9 ++++++++- 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 63b30153..42c85be4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -663,6 +663,9 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) void **slot; unsigned int found; + if (!test_opt(sbi, EXTENT_CACHE)) + return; + if (available_free_memory(sbi, EXTENT_CACHE)) return; @@ -711,6 +714,9 @@ void f2fs_destroy_extent_tree(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; + if (!test_opt(sbi, EXTENT_CACHE)) + return; + down_read(&sbi->extent_tree_lock); et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); if (!et) { @@ -746,6 +752,9 @@ void f2fs_destroy_extent_tree(struct inode *inode) static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { + if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) + return f2fs_lookup_extent_tree(inode, pgofs, ei); + return lookup_extent_info(inode, pgofs, ei); } @@ -762,6 +771,10 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; + if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE)) + return f2fs_update_extent_tree(dn->inode, fofs, + dn->data_blkaddr); + if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) sync_inode_page(dn); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 8d5f8185..744dbd56 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -334,6 +334,7 @@ void f2fs_evict_inode(struct inode *inode) no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); + f2fs_destroy_extent_tree(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d9c5e188..d5f8d648 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -334,6 +334,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { + /* try to shrink extent cache when there is no enough memory */ + f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); + /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || excess_prefree_segs(sbi) || diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7e89633e..e0712b66 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1069,6 +1069,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&sbi->dir_inode_list); spin_lock_init(&sbi->dir_inode_lock); + init_extent_cache_info(sbi); + init_ino_entry_info(sbi); /* setup f2fs internal modules */ @@ -1273,10 +1275,13 @@ static int __init init_f2fs_fs(void) err = create_checkpoint_caches(); if (err) goto free_segment_manager_caches; + err = create_extent_cache(); + if (err) + goto free_checkpoint_caches; f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); if (!f2fs_kset) { err = -ENOMEM; - goto free_checkpoint_caches; + goto free_extent_cache; } err = register_filesystem(&f2fs_fs_type); if (err) @@ -1287,6 +1292,8 @@ static int __init init_f2fs_fs(void) free_kset: kset_unregister(f2fs_kset); +free_extent_cache: + destroy_extent_cache(); free_checkpoint_caches: destroy_checkpoint_caches(); free_segment_manager_caches: From 967f1187ef3d1455b467215be799de314011aed5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:58:28 +0800 Subject: [PATCH 158/321] f2fs: show extent tree, node stat info in debugfs This patch add and show stat info of total memory footprint for extent tree,node in debugfs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 7 +++++++ fs/f2fs/f2fs.h | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index e671373c..1006290e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -35,6 +35,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) /* validation check of the segment numbers */ si->hit_ext = sbi->read_hit_ext; si->total_ext = sbi->total_hit_ext; + si->ext_tree = sbi->total_ext_tree; + si->ext_node = atomic_read(&sbi->total_ext_node); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_dirs = sbi->n_dirty_dirs; @@ -185,6 +187,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); + si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree); + si->cache_mem += atomic_read(&sbi->total_ext_node) * + sizeof(struct extent_node); si->page_mem = 0; npages = NODE_MAPPING(sbi)->nrpages; @@ -267,6 +272,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - node blocks : %d\n", si->node_blks); seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); + seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); + seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - inmem: %4d, wb: %4d\n", si->inmem_pages, si->wb_pages); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8e9b8439..c6f3d789 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1606,7 +1606,7 @@ struct f2fs_stat_info { struct f2fs_sb_info *sbi; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int main_area_segs, main_area_sections, main_area_zones; - int hit_ext, total_ext; + int hit_ext, total_ext, ext_tree, ext_node; int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; From 362678b07c50ee1bf51fca779c4517ca925a3c12 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:59:59 +0800 Subject: [PATCH 159/321] f2fs: add trace for rb-tree extent cache ops This patch adds trace for lookup/update/shrink/destroy ops in rb-tree extent cache. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 ++++- include/trace/events/f2fs.h | 134 ++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 42c85be4..f2f8cb9d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -530,6 +530,8 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) return false; + trace_f2fs_lookup_extent_tree_start(inode, pgofs); + down_read(&sbi->extent_tree_lock); et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); if (!et) { @@ -552,6 +554,8 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, stat_inc_total_hit(sbi->sb); read_unlock(&et->lock); + trace_f2fs_lookup_extent_tree_end(inode, pgofs, en); + atomic_dec(&et->refcount); return en ? true : false; } @@ -570,6 +574,8 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) return; + trace_f2fs_update_extent_tree(inode, fofs, blkaddr); + down_write(&sbi->extent_tree_lock); et = radix_tree_lookup(&sbi->extent_tree_root, ino); if (!et) { @@ -662,6 +668,7 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) struct radix_tree_iter iter; void **slot; unsigned int found; + unsigned int node_cnt = 0, tree_cnt = 0; if (!test_opt(sbi, EXTENT_CACHE)) return; @@ -688,7 +695,7 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) atomic_inc(&et->refcount); write_lock(&et->lock); - __free_extent_tree(sbi, et, false); + node_cnt += __free_extent_tree(sbi, et, false); write_unlock(&et->lock); atomic_dec(&et->refcount); } @@ -704,15 +711,19 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) radix_tree_delete(&sbi->extent_tree_root, et->ino); kmem_cache_free(extent_tree_slab, et); sbi->total_ext_tree--; + tree_cnt++; } } up_write(&sbi->extent_tree_lock); + + trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); } void f2fs_destroy_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; + unsigned int node_cnt = 0; if (!test_opt(sbi, EXTENT_CACHE)) return; @@ -728,7 +739,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) /* free all extent info belong to this extent tree */ write_lock(&et->lock); - __free_extent_tree(sbi, et, true); + node_cnt = __free_extent_tree(sbi, et, true); write_unlock(&et->lock); atomic_dec(&et->refcount); @@ -746,6 +757,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) sbi->total_ext_tree--; up_write(&sbi->extent_tree_lock); out: + trace_f2fs_destroy_extent_tree(inode, node_cnt); return; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index b83cc022..518e5d4b 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1008,6 +1008,140 @@ TRACE_EVENT(f2fs_issue_flush, __entry->nobarrier ? "skip (nobarrier)" : "issue", __entry->flush_merge ? " with flush_merge" : "") ); + +TRACE_EVENT(f2fs_lookup_extent_tree_start, + + TP_PROTO(struct inode *inode, unsigned int pgofs), + + TP_ARGS(inode, pgofs), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u", + show_dev_ino(__entry), + __entry->pgofs) +); + +TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, + + TP_PROTO(struct inode *inode, unsigned int pgofs, + struct extent_node *en), + + TP_ARGS(inode, pgofs, en), + + TP_CONDITION(en), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(unsigned int, fofs) + __field(u32, blk) + __field(unsigned int, len) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->fofs = en->ei.fofs; + __entry->blk = en->ei.blk; + __entry->len = en->ei.len; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " + "ext_info(fofs: %u, blk: %u, len: %u)", + show_dev_ino(__entry), + __entry->pgofs, + __entry->fofs, + __entry->blk, + __entry->len) +); + +TRACE_EVENT(f2fs_update_extent_tree, + + TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr), + + TP_ARGS(inode, pgofs, blkaddr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(u32, blk) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->blk = blkaddr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, blkaddr = %u", + show_dev_ino(__entry), + __entry->pgofs, + __entry->blk) +); + +TRACE_EVENT(f2fs_shrink_extent_tree, + + TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt, + unsigned int tree_cnt), + + TP_ARGS(sbi, node_cnt, tree_cnt), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, node_cnt) + __field(unsigned int, tree_cnt) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + __entry->node_cnt = node_cnt; + __entry->tree_cnt = tree_cnt; + ), + + TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u", + show_dev(__entry), + __entry->node_cnt, + __entry->tree_cnt) +); + +TRACE_EVENT(f2fs_destroy_extent_tree, + + TP_PROTO(struct inode *inode, unsigned int node_cnt), + + TP_ARGS(inode, node_cnt), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, node_cnt) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->node_cnt = node_cnt; + ), + + TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u", + show_dev_ino(__entry), + __entry->node_cnt) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ From 5addfbcb778e1623fd91db025c1d5f9c6d79f3c0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 18:01:39 +0800 Subject: [PATCH 160/321] f2fs: support fast lookup in extent cache This patch adds a fast lookup path for rb-tree extent cache. In this patch we add a recently accessed extent node pointer 'cached_en' in extent tree. In lookup path of extent cache, we will firstly lookup the last accessed extent node which cached_en points, if we do not hit in this node, we will try to lookup extent node in rb-tree. By this way we can avoid unnecessary slow lookup in rb-tree sometimes. Note that, side-effect of this patch is that we will increase memory cost, because we will store a pointer variable in each struct extent tree additionally. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 19 ++++++++++++++++--- fs/f2fs/f2fs.h | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f2f8cb9d..59a34a96 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -392,6 +392,9 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi, rb_erase(&en->rb_node, &et->root); et->count--; atomic_dec(&sbi->total_ext_node); + + if (et->cached_en == en) + et->cached_en = NULL; } static struct extent_node *__lookup_extent_tree(struct extent_tree *et, @@ -400,15 +403,24 @@ static struct extent_node *__lookup_extent_tree(struct extent_tree *et, struct rb_node *node = et->root.rb_node; struct extent_node *en; + if (et->cached_en) { + struct extent_info *cei = &et->cached_en->ei; + + if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) + return et->cached_en; + } + while (node) { en = rb_entry(node, struct extent_node, rb_node); - if (fofs < en->ei.fofs) + if (fofs < en->ei.fofs) { node = node->rb_left; - else if (fofs >= en->ei.fofs + en->ei.len) + } else if (fofs >= en->ei.fofs + en->ei.len) { node = node->rb_right; - else + } else { + et->cached_en = en; return en; + } } return NULL; } @@ -584,6 +596,7 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, memset(et, 0, sizeof(struct extent_tree)); et->ino = ino; et->root = RB_ROOT; + et->cached_en = NULL; rwlock_init(&et->lock); atomic_set(&et->refcount, 0); et->count = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c6f3d789..aff18dca 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -297,6 +297,7 @@ struct extent_node { struct extent_tree { nid_t ino; /* inode number */ struct rb_root root; /* root of extent info rb-tree */ + struct extent_node *cached_en; /* recently accessed extent node */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t refcount; /* reference count of rb-tree */ unsigned int count; /* # of extent node in rb-tree*/ From 7c21d3e3e9cf26410ff019fd1298193a53a92c22 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 18:02:44 +0800 Subject: [PATCH 161/321] f2fs: switch to check FI_NO_EXTENT in f2fs_{lookup,update}_extent_cache This patch switch to check FI_NO_EXTENT in f2fs_{lookup,update}_extent_cache instead of f2fs_{lookup,update}_extent_tree or {lookup,update}_extent_info. No functionality modification in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 59a34a96..afc5efd4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -270,9 +270,6 @@ static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, pgoff_t start_fofs, end_fofs; block_t start_blkaddr; - if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return false; - read_lock(&fi->ext_lock); if (fi->ext.len == 0) { read_unlock(&fi->ext_lock); @@ -303,9 +300,6 @@ static bool update_extent_info(struct inode *inode, pgoff_t fofs, block_t start_blkaddr, end_blkaddr; int need_update = true; - if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return false; - write_lock(&fi->ext_lock); start_fofs = fi->ext.fofs; @@ -539,9 +533,6 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_tree *et; struct extent_node *en; - if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) - return false; - trace_f2fs_lookup_extent_tree_start(inode, pgofs); down_read(&sbi->extent_tree_lock); @@ -583,9 +574,6 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, struct extent_info ei, dei; unsigned int endofs; - if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) - return; - trace_f2fs_update_extent_tree(inode, fofs, blkaddr); down_write(&sbi->extent_tree_lock); @@ -777,6 +765,9 @@ void f2fs_destroy_extent_tree(struct inode *inode) static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { + if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + return false; + if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) return f2fs_lookup_extent_tree(inode, pgofs, ei); @@ -793,6 +784,9 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) /* Update the page address in the parent node */ __set_data_blkaddr(dn); + if (is_inode_flag_set(fi, FI_NO_EXTENT)) + return; + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; From f74e5bda2dd691d1eb1ca317fc3e61004edc35a9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 18:03:40 +0800 Subject: [PATCH 162/321] f2fs: use extent cache for dir We update extent cache for all user inode of f2fs including dir inode, so this patch gives another chance to try to get physical address of page from extent cache for dir inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index afc5efd4..6ccf9b21 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -803,6 +803,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; + struct extent_info ei; int err; struct f2fs_io_info fio = { .type = DATA, @@ -814,6 +815,11 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) return page; f2fs_put_page(page, 0); + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + goto got_it; + } + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) @@ -827,6 +833,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (unlikely(dn.data_blkaddr == NEW_ADDR)) return ERR_PTR(-EINVAL); +got_it: page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -861,6 +868,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; + struct extent_info ei; int err; struct f2fs_io_info fio = { .type = DATA, @@ -871,6 +879,11 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) if (!page) return ERR_PTR(-ENOMEM); + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + goto got_it; + } + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) { @@ -884,6 +897,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) return ERR_PTR(-ENOENT); } +got_it: if (PageUptodate(page)) return page; From 60ed31d805f719c0b964d9a7b006de9b2d8822c9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 23 Feb 2015 19:59:52 -0800 Subject: [PATCH 163/321] f2fs: remove obsolete code This patch removes obsolete code in which summary variable is not needed. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 667874bf..86934be3 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -407,8 +407,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, dn.ofs_in_node++; } - /* write node page in place */ - set_summary(&sum, dn.nid, 0, 0); if (IS_INODE(dn.node_page)) sync_inode_page(&dn); From a6fb34a59c0b3924d2db632eccb41d253d86535e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Feb 2015 18:01:46 -0800 Subject: [PATCH 164/321] f2fs: avoid wrong error during recovery During the roll-forward recovery, -ENOENT for f2fs_iget can be skipped. So, this error value should not be propagated. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 86934be3..f7be540d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -212,8 +212,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); kmem_cache_free(fsync_entry_slab, entry); - if (err == -ENOENT) + if (err == -ENOENT) { + err = 0; goto next; + } break; } list_add_tail(&entry->list, head); From 38e45cb918171395cb5d1cbe560a57e5a4e29b26 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 23 Jan 2015 20:36:04 +0800 Subject: [PATCH 165/321] f2fs: add F2FS_IOC_GETVERSION support In this patch we add the FS_IOC_GETVERSION ioctl for getting i_generation from inode, after that, users can list file's generation number by using "lsattr -v". Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index aff18dca..453875bb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -208,6 +208,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, */ #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS +#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION #define F2FS_IOCTL_MAGIC 0xf5 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4c97b52d..701954be 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -937,6 +937,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) return ret; } +static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + + return put_user(inode->i_generation, (int __user *)arg); +} + static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); @@ -1072,6 +1079,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_getflags(filp, arg); case F2FS_IOC_SETFLAGS: return f2fs_ioc_setflags(filp, arg); + case F2FS_IOC_GETVERSION: + return f2fs_ioc_getversion(filp, arg); case F2FS_IOC_START_ATOMIC_WRITE: return f2fs_ioc_start_atomic_write(filp); case F2FS_IOC_COMMIT_ATOMIC_WRITE: From 601b8972a4d3534d50b1a29eb9a3ef358d759110 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 8 Jan 2015 19:15:53 -0800 Subject: [PATCH 166/321] f2fs: support fs shutdown This patch add an ioctl to shutdown f2fs, which stops all the further block writes after this point. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/file.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 453875bb..99865c40 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -209,6 +209,14 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS #define F2FS_IOC_GETVERSION FS_IOC_GETVERSION +#define FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ + +/* + * Flags for going down operation used by FS_IOC_GOINGDOWN + */ +#define FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ +#define FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ +#define FS_GOING_DOWN_NOSYNC 0x2 /* going down */ #define F2FS_IOCTL_MAGIC 0xf5 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 701954be..ce7fc3d4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1042,6 +1042,41 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) return ret; } +static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct super_block *sb = sbi->sb; + __u32 in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__u32 __user *)arg)) + return -EFAULT; + + switch (in) { + case FS_GOING_DOWN_FULLSYNC: + sb = freeze_bdev(sb->s_bdev); + if (sb && !IS_ERR(sb)) { + f2fs_stop_checkpoint(sbi); + thaw_bdev(sb->s_bdev, sb); + } + break; + case FS_GOING_DOWN_METASYNC: + /* do checkpoint only */ + f2fs_sync_fs(sb, 1); + f2fs_stop_checkpoint(sbi); + break; + case FS_GOING_DOWN_NOSYNC: + f2fs_stop_checkpoint(sbi); + break; + default: + return -EINVAL; + } + return 0; +} + static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1091,6 +1126,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_release_volatile_write(filp); case F2FS_IOC_ABORT_VOLATILE_WRITE: return f2fs_ioc_abort_volatile_write(filp); + case FS_IOC_SHUTDOWN: + return f2fs_ioc_shutdown(filp, arg); case FITRIM: return f2fs_ioc_fitrim(filp, arg); default: From 7153ccfb1089c307f1e27dbebdaa5f40b85cc80b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 8 Jan 2015 19:15:53 -0800 Subject: [PATCH 167/321] f2fs: support fs shutdown This patch add an ioctl to shutdown f2fs, which stops all the further block writes after this point. Change-Id: If4e4ca3777d01b7aacf6427f40aa40174a819144 Signed-off-by: Jaegeuk Kim From 992bb85c89ccf9d17c3f32c20d87762eb41187cc Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 26 Feb 2015 07:57:20 +0800 Subject: [PATCH 168/321] f2fs: introduce macro __cp_payload This patch introduce macro __cp_payload. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++---- fs/f2fs/f2fs.h | 7 ++++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2716d15d..9caa9f7e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -470,8 +470,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) set_sbi_flag(sbi, SBI_POR_DOING); - start_blk = __start_cp_addr(sbi) + 1 + - le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blkaddr = __start_sum_addr(sbi) - 1; ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); @@ -614,7 +613,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned long blk_size = sbi->blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; - unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + unsigned int cp_blks = 1 + __cp_payload(sbi); block_t cp_blk_no; int i; @@ -882,7 +881,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u32 crc32 = 0; void *kaddr; int i; - int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + int cp_payload_blks = __cp_payload(sbi); /* * This avoids to conduct wrong roll-forward operations and uses diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 99865c40..bf093a1b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -981,12 +981,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) return 0; } +static inline block_t __cp_payload(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); +} + static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); int offset; - if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { + if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) return &ckpt->sit_nat_version_bitmap; else From 8f8aa7991a233484802b13ba86067d179b6e81e0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 26 Feb 2015 07:57:21 +0800 Subject: [PATCH 169/321] f2fs: fix the number of orphan inode blocks cp_pack_start_sum is calculated in do_checkpoint and is equal to cpu_to_le32(1 + cp_payload_blks + orphan_blocks). The number of orphan inode blocks is take advantage of by recover_orphan_inodes to readahead meta pages and recovery inodes. However, current codes forget to reduce the number of cp payload blocks when calculate the number of orphan inode blocks. This patch fix it. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9caa9f7e..8a0c62cd 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -463,7 +463,7 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) void recover_orphan_inodes(struct f2fs_sb_info *sbi) { - block_t start_blk, orphan_blkaddr, i, j; + block_t start_blk, orphan_blocks, i, j; if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) return; @@ -471,11 +471,11 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) set_sbi_flag(sbi, SBI_POR_DOING); start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); - orphan_blkaddr = __start_sum_addr(sbi) - 1; + orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); - ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); + ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP); - for (i = 0; i < orphan_blkaddr; i++) { + for (i = 0; i < orphan_blocks; i++) { struct page *page = get_meta_page(sbi, start_blk + i); struct f2fs_orphan_block *orphan_blk; From 0018b40258d51105a02eb38de2d5b9f55896625a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 25 Feb 2015 19:25:01 -0800 Subject: [PATCH 170/321] f2fs: clear page's up-to-date if block was deallocated If page's on-disk block was deallocated, let's remove up-to-date flag to avoid further access with wrong contents. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++++- fs/f2fs/dir.c | 1 + fs/f2fs/node.c | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6ccf9b21..dc7a4e03 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1256,8 +1256,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) fio->blk_addr = dn.data_blkaddr; /* This page is already truncated */ - if (fio->blk_addr == NULL_ADDR) + if (fio->blk_addr == NULL_ADDR) { + ClearPageUptodate(page); goto out_writepage; + } set_page_writeback(page); @@ -1352,6 +1354,8 @@ static int f2fs_write_data_page(struct page *page, clear_cold_data(page); out: inode_dec_dirty_pages(inode); + if (err) + ClearPageUptodate(page); unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index ffaf9343..14bff26c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -697,6 +697,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, if (bit_pos == NR_DENTRY_IN_BLOCK) { truncate_hole(dir, page->index, page->index + 1); clear_page_dirty_for_io(page); + ClearPagePrivate(page); ClearPageUptodate(page); inode_dec_dirty_pages(dir); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 69417786..d5b8f77d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1001,6 +1001,7 @@ static int read_node_page(struct page *page, int rw) get_node_info(sbi, page->index, &ni); if (unlikely(ni.blk_addr == NULL_ADDR)) { + ClearPageUptodate(page); f2fs_put_page(page, 1); return -ENOENT; } @@ -1314,6 +1315,7 @@ static int f2fs_write_node_page(struct page *page, /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { + ClearPageUptodate(page); dec_page_count(sbi, F2FS_DIRTY_NODES); unlock_page(page); return 0; From 8b97ce0fdf39a6b6ec96107304f964380991d550 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 25 Feb 2015 19:54:48 -0800 Subject: [PATCH 171/321] f2fs: check its block allocation to avoid producing wrong dirty pages If a page is cached but its block was deallocated, we don't need to make the page dirty again by gc and truncate_partial_data_page. In that case, it needs to check its block allocation all the time instead of giving up-to-date page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index dc7a4e03..0251635c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -810,11 +810,19 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) .rw = sync ? READ_SYNC : READA, }; + /* + * If sync is false, it needs to check its block allocation. + * This is need and triggered by two flows: + * gc and truncate_partial_data_page. + */ + if (!sync) + goto search; + page = find_get_page(mapping, index); if (page && PageUptodate(page)) return page; f2fs_put_page(page, 0); - +search: if (f2fs_lookup_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; goto got_it; From 4715130e7e6ed67f8603d1be7496c978439f6974 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 27 Feb 2015 15:56:16 +0800 Subject: [PATCH 172/321] f2fs: fix block_ops trace point block operations is used to flush all dirty node and dentry blocks in the page cache and suspend ordinary writing activities, however, there are some facts such like cp error or mount read-only etc which lead to block operations can't be invoked. Current trace point print block_ops start premature even if block_ops doesn't have opportunity to execute. This patch fix it by move block_ops trace point just before block_ops. Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8a0c62cd..08b1e420 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1045,8 +1045,6 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); - mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && @@ -1056,6 +1054,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) goto out; if (f2fs_readonly(sbi->sb)) goto out; + + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); + if (block_operations(sbi)) goto out; From 7c51a0836fd8414de65eca5f56d50b3d71bef116 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 27 Feb 2015 16:52:50 +0800 Subject: [PATCH 173/321] f2fs: don't need to collect dirty sit entries and flush journal when there's no dirty sit entries Don't need to collect dirty sit entries and flush sit journal to sit entries when there's no dirty sit entries. This patch check dirty_sentries earlier just like flush_nat_entries. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d5f8d648..a9447600 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1790,6 +1790,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); + if (!sit_i->dirty_sentries) + goto out; + /* * add and account sit entries of dirty bitmap in sit entry * set temporarily @@ -1804,9 +1807,6 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) remove_sits_in_journal(sbi); - if (!sit_i->dirty_sentries) - goto out; - /* * there are two steps to flush sit entries: * #1, flush sit entries to journal in current cold data summary block. From f1aaed755ba8c52a80363d6f0f11fcdb87e44a1b Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 27 Feb 2015 17:38:13 +0800 Subject: [PATCH 174/321] f2fs: fix max orphan inodes calculation cp_payload is introduced for sit bitmap to support large volume, and it is just after the block of f2fs_checkpoint + nat bitmap, so the first segment should include F2FS_CP_PACKS + NR_CURSEG_TYPE + cp_payload + orphan blocks. However, current max orphan inodes calculation don't consider cp_payload, this patch fix it by reducing the number of cp_payload from total blocks of the first segment when calculate max orphan inodes. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 08b1e420..60c5ea88 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1102,13 +1102,15 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) } /* - * considering 512 blocks in a segment 8 blocks are needed for cp - * and log segment summaries. Remaining blocks are used to keep - * orphan entries with the limitation one reserved segment - * for cp pack we can have max 1020*504 orphan entries + * considering 512 blocks in a segment 8+cp_payload blocks are + * needed for cp and log segment summaries. Remaining blocks are + * used to keep orphan entries with the limitation one reserved + * segment for cp pack we can have max 1020*(504-cp_payload) + * orphan entries */ sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; + NR_CURSEG_TYPE - __cp_payload(sbi)) * + F2FS_ORPHANS_PER_BLOCK; } int __init create_checkpoint_caches(void) From 5a9ce5642a121801519ab64f8095f277b77de706 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 27 Feb 2015 13:13:14 +0100 Subject: [PATCH 175/321] f2fs: add cond_resched() to sync_dirty_dir_inodes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a preempt-off enviroment a alot of FS activity (write/delete) I run into a CPU stall: | NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [kworker/u2:2:59] | Modules linked in: | CPU: 0 PID: 59 Comm: kworker/u2:2 Tainted: G W 3.19.0-00010-g10c11c51ffed #153 | Workqueue: writeback bdi_writeback_workfn (flush-179:0) | task: df230000 ti: df23e000 task.ti: df23e000 | PC is at __submit_merged_bio+0x6c/0x110 | LR is at f2fs_submit_merged_bio+0x74/0x80 … | [] (gic_handle_irq) from [] (__irq_svc+0x44/0x5c) | Exception stack(0xdf23fb48 to 0xdf23fb90) | fb40: deef3484 ffff0001 ffff0001 00000027 deef3484 00000000 | fb60: deef3440 00000000 de426000 deef34ec deefc440 df23fbb4 df23fbb8 df23fb90 | fb80: c02191f0 c0218fa0 60000013 ffffffff | [] (__irq_svc) from [] (__submit_merged_bio+0x6c/0x110) | [] (__submit_merged_bio) from [] (f2fs_submit_merged_bio+0x74/0x80) | [] (f2fs_submit_merged_bio) from [] (sync_dirty_dir_inodes+0x70/0x78) | [] (sync_dirty_dir_inodes) from [] (write_checkpoint+0x104/0xc10) | [] (write_checkpoint) from [] (f2fs_sync_fs+0x80/0xbc) | [] (f2fs_sync_fs) from [] (f2fs_balance_fs_bg+0x4c/0x68) | [] (f2fs_balance_fs_bg) from [] (f2fs_write_node_pages+0x40/0x110) | [] (f2fs_write_node_pages) from [] (do_writepages+0x34/0x48) | [] (do_writepages) from [] (__writeback_single_inode+0x50/0x228) | [] (__writeback_single_inode) from [] (writeback_sb_inodes+0x1a8/0x378) | [] (writeback_sb_inodes) from [] (__writeback_inodes_wb+0x90/0xc8) | [] (__writeback_inodes_wb) from [] (wb_writeback+0x1dc/0x28c) | [] (wb_writeback) from [] (bdi_writeback_workfn+0x2ac/0x460) | [] (bdi_writeback_workfn) from [] (process_one_work+0x11c/0x3a4) | [] (process_one_work) from [] (worker_thread+0x17c/0x490) | [] (worker_thread) from [] (kthread+0xec/0x100) | [] (kthread) from [] (ret_from_fork+0x14/0x24) As it turns out, the code loops in sync_dirty_dir_inodes() and waits for others to make progress but since it never leaves the CPU there is no progress made. At the time of this stall, there is also a rm process blocked: | rm R running 0 1989 1774 0x00000000 | [] (__schedule) from [] (__cond_resched+0x30/0x4c) | [] (__cond_resched) from [] (_cond_resched+0x4c/0x54) | [] (_cond_resched) from [] (truncate_inode_pages_range+0x1f0/0x5e8) | [] (truncate_inode_pages_range) from [] (truncate_inode_pages+0x28/0x30) | [] (truncate_inode_pages) from [] (truncate_inode_pages_final+0x60/0x64) | [] (truncate_inode_pages_final) from [] (f2fs_evict_inode+0x4c/0x268) | [] (f2fs_evict_inode) from [] (evict+0x94/0x140) | [] (evict) from [] (iput+0xc8/0x134) | [] (iput) from [] (d_delete+0x154/0x180) | [] (d_delete) from [] (vfs_rmdir+0x114/0x12c) | [] (vfs_rmdir) from [] (do_rmdir+0x158/0x168) | [] (do_rmdir) from [] (SyS_unlinkat+0x30/0x3c) | [] (SyS_unlinkat) from [] (ret_fast_syscall+0x0/0x4c) As explained by Jaegeuk Kim: |This inode is the directory (c.f., do_rmdir) causing a infinite loop on |sync_dirty_dir_inodes. |The sync_dirty_dir_inodes tries to flush dirty dentry pages, but if the |inode is under eviction, it submits bios and do it again until eviction |is finished. This patch adds a cond_resched() (as suggested by Jaegeuk) after a BIO is submitted so other thread can make progress. Signed-off-by: Sebastian Andrzej Siewior [Jaegeuk Kim: change fs/f2fs to f2fs in subject as naming convention] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 60c5ea88..c393e166 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -793,6 +793,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) * wribacking dentry pages in the freeing inode. */ f2fs_submit_merged_bio(sbi, DATA, WRITE); + cond_resched(); } goto retry; } From 170dee3207716f66e7a8918bdf93dcea4e6eb84e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Feb 2015 17:23:30 +0800 Subject: [PATCH 176/321] f2fs: fix to issue small discard in real-time mode discard Now in f2fs, we share functions and structures for batch mode and real-time mode discard. For real-time mode discard, in shared function add_discard_addrs, we will use uninitialized trim_minlen in struct cp_control to compare with length of contiguous free blocks to decide whether skipping discard fragmented freespace or not, this makes us ignore small discard sometimes. Fix it. Signed-off-by: Chao Yu Reviewed-by : Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a9447600..72272ba0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -609,7 +609,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); - if (end - start < cpc->trim_minlen) + if (force && end - start < cpc->trim_minlen) continue; __add_discard_entry(sbi, cpc, start, end); From 3cdfc2b56b634bb613711031f43ce29b3a462db4 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Tue, 23 Dec 2014 08:37:39 +0900 Subject: [PATCH 177/321] f2fs: add stat info for moved blocks by background gc This patch is for looking into gc performance of f2fs in detail. Signed-off-by: Changman Lee [Jaegeuk Kim: fix build errors] Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 15 ++++++++++----- fs/f2fs/f2fs.h | 23 +++++++++++++++-------- fs/f2fs/gc.c | 6 +++--- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 1006290e..f5388f37 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -265,11 +265,16 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "CP calls: %d\n", si->cp_count); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); - seq_printf(s, " - data segments : %d\n", si->data_segs); - seq_printf(s, " - node segments : %d\n", si->node_segs); - seq_printf(s, "Try to move %d blocks\n", si->tot_blks); - seq_printf(s, " - data blocks : %d\n", si->data_blks); - seq_printf(s, " - node blocks : %d\n", si->node_blks); + seq_printf(s, " - data segments : %d (%d)\n", + si->data_segs, si->bg_data_segs); + seq_printf(s, " - node segments : %d (%d)\n", + si->node_segs, si->bg_node_segs); + seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks, + si->bg_data_blks + si->bg_node_blks); + seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks, + si->bg_data_blks); + seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, + si->bg_node_blks); seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bf093a1b..2fdfb693 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1633,7 +1633,9 @@ struct f2fs_stat_info { int dirty_count, node_pages, meta_pages; int prefree_count, call_count, cp_count; int tot_segs, node_segs, data_segs, free_segs, free_secs; + int bg_node_segs, bg_data_segs; int tot_blks, data_blks, node_blks; + int bg_data_blks, bg_node_blks; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; @@ -1682,31 +1684,36 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ((sbi)->block_count[(curseg)->alloc_type]++) #define stat_inc_inplace_blocks(sbi) \ (atomic_inc(&(sbi)->inplace_count)) -#define stat_inc_seg_count(sbi, type) \ +#define stat_inc_seg_count(sbi, type, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ (si)->tot_segs++; \ - if (type == SUM_TYPE_DATA) \ + if (type == SUM_TYPE_DATA) { \ si->data_segs++; \ - else \ + si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ + } else { \ si->node_segs++; \ + si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \ + } \ } while (0) #define stat_inc_tot_blk_count(si, blks) \ (si->tot_blks += (blks)) -#define stat_inc_data_blk_count(sbi, blks) \ +#define stat_inc_data_blk_count(sbi, blks, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->data_blks += (blks); \ + si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \ } while (0) -#define stat_inc_node_blk_count(sbi, blks) \ +#define stat_inc_node_blk_count(sbi, blks, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->node_blks += (blks); \ + si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ } while (0) int f2fs_build_stats(struct f2fs_sb_info *); @@ -1728,10 +1735,10 @@ void f2fs_destroy_root_stats(void); #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) #define stat_inc_inplace_blocks(sbi) -#define stat_inc_seg_count(si, type) +#define stat_inc_seg_count(sbi, type, gc_type) #define stat_inc_tot_blk_count(si, blks) -#define stat_inc_data_blk_count(si, blks) -#define stat_inc_node_blk_count(sbi, blks) +#define stat_inc_data_blk_count(sbi, blks, gc_type) +#define stat_inc_node_blk_count(sbi, blks, gc_type) static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 76adbc36..ed58211f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -435,7 +435,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, set_page_dirty(node_page); } f2fs_put_page(node_page, 1); - stat_inc_node_blk_count(sbi, 1); + stat_inc_node_blk_count(sbi, 1, gc_type); } if (initial) { @@ -622,7 +622,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (IS_ERR(data_page)) continue; move_data_page(inode, data_page, gc_type); - stat_inc_data_blk_count(sbi, 1); + stat_inc_data_blk_count(sbi, 1, gc_type); } } @@ -680,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, } blk_finish_plug(&plug); - stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); + stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type); stat_inc_call_count(sbi->stat_info); f2fs_put_page(sum_page, 1); From c695ead100f0f8216b073a3ce7e0c330a3dc6cb8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Feb 2015 13:37:39 -0800 Subject: [PATCH 178/321] f2fs: avoid to trigger writepage during POR This patch doesn't make any effect on previous behavior, since f2fs_write_data_page bypasses writing the page during POR. But, the difference is that this patch avoids holding writepages mutex. This is to avoid the following false warning, since this can happen only when mount and shutdown are triggered at the same time. ====================================================== [ INFO: possible circular locking dependency detected ] 4.0.0-rc1+ #3 Tainted: G O ------------------------------------------------------- kworker/u8:0/2270 is trying to acquire lock: (&sbi->gc_mutex){+.+.+.}, at: [] f2fs_balance_fs+0x73/0x90 [f2fs] but task is already holding lock: (&sbi->writepages){+.+...}, at: [] f2fs_write_data_pages+0xcb/0x3a0 [f2fs] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&sbi->writepages){+.+...}: [] lock_acquire+0xe1/0x2f0 [] mutex_lock_nested+0x63/0x530 [] f2fs_write_data_pages+0xcb/0x3a0 [f2fs] [] do_writepages+0x21/0x50 [] __writeback_single_inode+0x76/0xbf0 [] writeback_single_inode+0xea/0x1c0 [] write_inode_now+0x95/0xa0 [] iput+0x20b/0x3f0 [] recover_data.constprop.14+0x26b/0xa80 [f2fs] [] recover_fsync_data+0x2b6/0x5e0 [f2fs] [] f2fs_fill_super+0xb24/0xb90 [f2fs] [] mount_bdev+0x1a4/0x1e0 [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x39/0x180 [] vfs_kern_mount+0x6b/0x160 [] do_mount+0x204/0xbe0 [] SyS_mount+0x8b/0xe0 [] system_call_fastpath+0x16/0x1b -> #1 (&sbi->cp_mutex){+.+...}: [] lock_acquire+0xe1/0x2f0 [] mutex_lock_nested+0x63/0x530 [] write_checkpoint+0x42/0x1230 [f2fs] [] f2fs_sync_fs+0x9d/0x2a0 [f2fs] [] sync_filesystem+0x82/0xb0 [] generic_shutdown_super+0x34/0x100 [] kill_block_super+0x27/0x70 [] kill_f2fs_super+0x20/0x30 [f2fs] [] deactivate_locked_super+0x49/0x80 [] deactivate_super+0x4e/0x70 [] cleanup_mnt+0x43/0x90 [] __cleanup_mnt+0x12/0x20 [] task_work_run+0xc4/0xf0 [] do_notify_resume+0x8d/0xa0 [] int_signal+0x12/0x17 -> #0 (&sbi->gc_mutex){+.+.+.}: [] __lock_acquire+0x1ac6/0x1c90 [] lock_acquire+0xe1/0x2f0 [] mutex_lock_nested+0x63/0x530 [] f2fs_balance_fs+0x73/0x90 [f2fs] [] f2fs_write_data_page+0x348/0x5b0 [f2fs] [] __f2fs_writepage+0x1a/0x50 [f2fs] [] write_cache_pages+0x274/0x6f0 [] f2fs_write_data_pages+0xe0/0x3a0 [f2fs] [] do_writepages+0x21/0x50 [] __writeback_single_inode+0x76/0xbf0 [] writeback_sb_inodes+0x32a/0x710 [] __writeback_inodes_wb+0x9f/0xd0 [] wb_writeback+0x3db/0x850 [] bdi_writeback_workfn+0x148/0x980 [] process_one_work+0x1e2/0x840 [] worker_thread+0x121/0x460 [] kthread+0xf8/0x110 [] ret_from_fork+0x7c/0xb0 Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0251635c..769878c4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1405,6 +1405,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; + /* during POR, we don't need to trigger writepage at all. */ + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + diff = nr_pages_to_write(sbi, DATA, wbc); if (!S_ISDIR(inode->i_mode)) { From 71cba682aa46b11b5329cc0ff81bbc937db98532 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 2 Mar 2015 10:48:56 -0800 Subject: [PATCH 179/321] f2fs: clear append/update flags once fsync is done When fsync is done through checkpoint, previous f2fs missed to clear append and update flag. This patch fixes to clear them. This was originally catched by Changman Lee before. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ce7fc3d4..cca6ec3d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -237,6 +237,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * will be used only for fsynced inodes after checkpoint. */ try_to_fix_pino(inode); + clear_inode_flag(fi, FI_APPEND_WRITE); + clear_inode_flag(fi, FI_UPDATE_WRITE); goto out; } sync_nodes: From 538cac4608964ccd89f5644b9c0200f90b125dbc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 2 Mar 2015 16:28:16 -0800 Subject: [PATCH 180/321] f2fs: report -ENOENT for unreached data indices If inode has inline_data, it should report -ENOENT when accessing out-of-bound region. This is used by f2fs_fiemap which treats -ENOENT with no error. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d5b8f77d..03f0ee19 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -500,7 +500,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) /* if inline_data is set, should not report any block indices */ if (f2fs_has_inline_data(dn->inode) && index) { - err = -EINVAL; + err = -ENOENT; f2fs_put_page(npage[0], 1); goto release_out; } From be6ceffcafaed898df255d5d1bb0e6bf3f2903b8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 3 Mar 2015 17:06:55 -0800 Subject: [PATCH 181/321] f2fs: relocate Kconfig from misc filesystems The f2fs has been shipped on many smartphone devices during a couple of years. So, it is worth to relocate Kconfig into main page from misc filesystems for developers to choose it more easily. Change-Id: I28830841a9564f6740342fee05e02814d92bba98 Signed-off-by: Jaegeuk Kim --- fs/Kconfig | 281 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/Kconfig | 2 +- 2 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 fs/Kconfig diff --git a/fs/Kconfig b/fs/Kconfig new file mode 100644 index 00000000..011f4336 --- /dev/null +++ b/fs/Kconfig @@ -0,0 +1,281 @@ +# +# File system configuration +# + +menu "File systems" + +# Use unaligned word dcache accesses +config DCACHE_WORD_ACCESS + bool + +if BLOCK + +source "fs/ext2/Kconfig" +source "fs/ext3/Kconfig" +source "fs/ext4/Kconfig" +source "fs/jbd/Kconfig" +source "fs/jbd2/Kconfig" + +config FS_MBCACHE +# Meta block cache for Extended Attributes (ext2/ext3/ext4) + tristate + default y if EXT2_FS=y && EXT2_FS_XATTR + default y if EXT3_FS=y && EXT3_FS_XATTR + default y if EXT4_FS=y + default m if EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS + +source "fs/reiserfs/Kconfig" +source "fs/jfs/Kconfig" + +source "fs/xfs/Kconfig" +source "fs/gfs2/Kconfig" +source "fs/ocfs2/Kconfig" +source "fs/btrfs/Kconfig" +source "fs/nilfs2/Kconfig" +source "fs/f2fs/Kconfig" + +config FS_DAX + bool "Direct Access (DAX) support" + depends on MMU + depends on !(ARM || MIPS || SPARC) + help + Direct Access (DAX) can be used on memory-backed block devices. + If the block device supports DAX and the filesystem supports DAX, + then you can avoid using the pagecache to buffer I/Os. Turning + on this option will compile in support for DAX; you will need to + mount the filesystem using the -o dax option. + + If you do not have a block device that is capable of using this, + or if unsure, say N. Saying Y will increase the size of the kernel + by about 5kB. + +endif # BLOCK + +# Posix ACL utility routines +# +# Note: Posix ACLs can be implemented without these helpers. Never use +# this symbol for ifdefs in core code. +# +config FS_POSIX_ACL + def_bool n + +config EXPORTFS + tristate + +config FILE_LOCKING + bool "Enable POSIX file locking API" if EXPERT + default y + help + This option enables standard file locking support, required + for filesystems like NFS and for the flock() system + call. Disabling this option saves about 11k. + +source "fs/notify/Kconfig" + +source "fs/quota/Kconfig" + +source "fs/autofs4/Kconfig" +source "fs/fuse/Kconfig" +source "fs/overlayfs/Kconfig" + +menu "Caches" + +source "fs/fscache/Kconfig" +source "fs/cachefiles/Kconfig" + +endmenu + +if BLOCK +menu "CD-ROM/DVD Filesystems" + +source "fs/isofs/Kconfig" +source "fs/udf/Kconfig" + +endmenu +endif # BLOCK + +if BLOCK +menu "DOS/FAT/NT Filesystems" + +source "fs/fat/Kconfig" +source "fs/ntfs/Kconfig" + +endmenu +endif # BLOCK + +menu "Pseudo filesystems" + +source "fs/proc/Kconfig" +source "fs/kernfs/Kconfig" +source "fs/sysfs/Kconfig" + +config TMPFS + bool "Tmpfs virtual memory file system support (former shm fs)" + depends on SHMEM + help + Tmpfs is a file system which keeps all files in virtual memory. + + Everything in tmpfs is temporary in the sense that no files will be + created on your hard drive. The files live in memory and swap + space. If you unmount a tmpfs instance, everything stored therein is + lost. + + See for details. + +config TMPFS_POSIX_ACL + bool "Tmpfs POSIX Access Control Lists" + depends on TMPFS + select TMPFS_XATTR + select FS_POSIX_ACL + help + POSIX Access Control Lists (ACLs) support additional access rights + for users and groups beyond the standard owner/group/world scheme, + and this option selects support for ACLs specifically for tmpfs + filesystems. + + If you've selected TMPFS, it's possible that you'll also need + this option as there are a number of Linux distros that require + POSIX ACL support under /dev for certain features to work properly. + For example, some distros need this feature for ALSA-related /dev + files for sound to work properly. In short, if you're not sure, + say Y. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website . + +config TMPFS_XATTR + bool "Tmpfs extended attributes" + depends on TMPFS + default n + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + for details). + + Currently this enables support for the trusted.* and + security.* namespaces. + + You need this for POSIX ACL support on tmpfs. + + If unsure, say N. + +config HUGETLBFS + bool "HugeTLB file system support" + depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \ + SYS_SUPPORTS_HUGETLBFS || BROKEN + help + hugetlbfs is a filesystem backing for HugeTLB pages, based on + ramfs. For architectures that support it, say Y here and read + for details. + + If unsure, say N. + +config HUGETLB_PAGE + def_bool HUGETLBFS + +source "fs/configfs/Kconfig" +source "fs/efivarfs/Kconfig" + +endmenu + +menuconfig MISC_FILESYSTEMS + bool "Miscellaneous filesystems" + default y + ---help--- + Say Y here to get to see options for various miscellaneous + filesystems, such as filesystems that came from other + operating systems. + + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled; if unsure, say Y here. + +if MISC_FILESYSTEMS + +source "fs/adfs/Kconfig" +source "fs/affs/Kconfig" +source "fs/ecryptfs/Kconfig" +source "fs/hfs/Kconfig" +source "fs/hfsplus/Kconfig" +source "fs/befs/Kconfig" +source "fs/bfs/Kconfig" +source "fs/efs/Kconfig" +source "fs/jffs2/Kconfig" +# UBIFS File system configuration +source "fs/ubifs/Kconfig" +source "fs/logfs/Kconfig" +source "fs/cramfs/Kconfig" +source "fs/squashfs/Kconfig" +source "fs/freevxfs/Kconfig" +source "fs/minix/Kconfig" +source "fs/omfs/Kconfig" +source "fs/hpfs/Kconfig" +source "fs/qnx4/Kconfig" +source "fs/qnx6/Kconfig" +source "fs/romfs/Kconfig" +source "fs/pstore/Kconfig" +source "fs/sysv/Kconfig" +source "fs/ufs/Kconfig" +source "fs/exofs/Kconfig" + +endif # MISC_FILESYSTEMS + +source "fs/exofs/Kconfig.ore" + +menuconfig NETWORK_FILESYSTEMS + bool "Network File Systems" + default y + depends on NET + ---help--- + Say Y here to get to see options for network filesystems and + filesystem-related networking code, such as NFS daemon and + RPCSEC security modules. + + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled; if unsure, say Y here. + +if NETWORK_FILESYSTEMS + +source "fs/nfs/Kconfig" +source "fs/nfsd/Kconfig" + +config GRACE_PERIOD + tristate + +config LOCKD + tristate + depends on FILE_LOCKING + select GRACE_PERIOD + +config LOCKD_V4 + bool + depends on NFSD_V3 || NFS_V3 + depends on FILE_LOCKING + default y + +config NFS_ACL_SUPPORT + tristate + select FS_POSIX_ACL + +config NFS_COMMON + bool + depends on NFSD || NFS_FS || LOCKD + default y + +source "net/sunrpc/Kconfig" +source "fs/ceph/Kconfig" +source "fs/cifs/Kconfig" +source "fs/ncpfs/Kconfig" +source "fs/coda/Kconfig" +source "fs/afs/Kconfig" +source "fs/9p/Kconfig" + +endif # NETWORK_FILESYSTEMS + +source "fs/nls/Kconfig" +source "fs/dlm/Kconfig" + +endmenu diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 67443fe7..ac6b0d78 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -1,5 +1,5 @@ config F2FS_FS - tristate "F2FS filesystem support (EXPERIMENTAL)" + tristate "F2FS filesystem support" depends on BLOCK help F2FS is based on Log-structured File System (LFS), which supports From 2f1372b0f23bb12ea183d2b82f34e74433bc4bd9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 6 Mar 2015 15:00:54 +0800 Subject: [PATCH 182/321] f2fs: fix extent cache memory leak extent tree/node slab cache is created during f2fs insmod, how, it isn't destroyed during f2fs rmmod, this patch fix it by destroy extent tree/node slab cache once rmmod f2fs. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e0712b66..50a21450 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1311,6 +1311,7 @@ static void __exit exit_f2fs_fs(void) remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); + destroy_extent_cache(); destroy_checkpoint_caches(); destroy_segment_manager_caches(); destroy_node_manager_caches(); From 2204e5b95d3b866325c74bb5c0a77b8938c0b79a Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 6 Mar 2015 15:00:55 +0800 Subject: [PATCH 183/321] f2fs: reduce searching region of segmap when set free section In __set_free we will check whether all segment are free in one section when free one segment, in order to set section to free status. But the searching region of segmap is from start segno to last segno of main area, it's not necessary. So let's just only check all segment bitmap of target section. Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7fd35111..85d7fa75 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -336,7 +336,8 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) clear_bit(segno, free_i->free_segmap); free_i->free_segments++; - next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); + next = find_next_bit(free_i->free_segmap, + start_segno + sbi->segs_per_sec, start_segno); if (next >= start_segno + sbi->segs_per_sec) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; From 481d551d82bb7f871b9458c6899cf218c613f87f Mon Sep 17 00:00:00 2001 From: Yuan Zhong Date: Sat, 7 Mar 2015 10:07:42 +0000 Subject: [PATCH 184/321] f2fs: set the correct place of initializing *res_page The function 'find_in_inline_dir()' contain 'res_page' as an argument. So, we should initiaize 'res_page' before this function. Signed-off-by: Yuan Zhong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 14bff26c..7ceda7dd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -233,14 +233,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct qstr *child, unsigned int max_depth; unsigned int level; + *res_page = NULL; + if (f2fs_has_inline_dentry(dir)) return find_in_inline_dir(dir, child, res_page, flags); if (npages == 0) return NULL; - *res_page = NULL; - name_hash = f2fs_dentry_hash(child); max_depth = F2FS_I(dir)->i_current_depth; From 5497492310ebb86a89683f78fc19d419a4f18561 Mon Sep 17 00:00:00 2001 From: Yuan Zhong Date: Mon, 9 Mar 2015 02:43:50 +0000 Subject: [PATCH 185/321] f2fs: remove unnecessary condition judgment Remove the unnecessary condition judgment, because 'max_slots' has been initialized to '0' at the beginging of the function, as following: if (max_slots) *max_slots = 0; Signed-off-by: Yuan Zhong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7ceda7dd..92614fe2 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -151,7 +151,7 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, goto found; } - if (max_slots && *max_slots >= 0 && max_len > *max_slots) { + if (max_slots && max_len > *max_slots) { *max_slots = max_len; max_len = 0; } From 186942f8753b2963fdb3a047aabe82c8c2e04686 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 9 Mar 2015 08:07:04 +0900 Subject: [PATCH 186/321] f2fs: cleanup statement about max orphan inodes calc Through each macro, we can read the meaning easily. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c393e166..7f61ce9d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1102,13 +1102,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) im->ino_num = 0; } - /* - * considering 512 blocks in a segment 8+cp_payload blocks are - * needed for cp and log segment summaries. Remaining blocks are - * used to keep orphan entries with the limitation one reserved - * segment for cp pack we can have max 1020*(504-cp_payload) - * orphan entries - */ sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - NR_CURSEG_TYPE - __cp_payload(sbi)) * F2FS_ORPHANS_PER_BLOCK; From 5406ff5bbd46a3bb721b7f4444d9d0f67fa87136 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 9 Mar 2015 11:00:55 +0800 Subject: [PATCH 187/321] f2fs: fix unlocked nat set cache operation nm_i->nat_tree_lock is used to sync both the operations of nat entry cache tree and nat set cache tree, however, it isn't held when flush nat entries during checkpoint which lead to potential race, this patch fix it by holding the lock when gang lookup nat set cache and delete item from nat set cache. Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 03f0ee19..3a59d73e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1830,6 +1830,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; struct page *page = NULL; + struct f2fs_nm_info *nm_i = NM_I(sbi); /* * there are two steps to flush nat entries: @@ -1883,7 +1884,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, set->entry_cnt); + down_write(&nm_i->nat_tree_lock); radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + up_write(&nm_i->nat_tree_lock); kmem_cache_free(nat_entry_set_slab, set); } @@ -1911,6 +1914,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_set(nm_i, set_idx, SETVEC_SIZE, setvec))) { unsigned idx; @@ -1919,6 +1923,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) __adjust_nat_entry_set(setvec[idx], &sets, MAX_NAT_JENTRIES(sum)); } + up_write(&nm_i->nat_tree_lock); /* flush dirty nats in nat entry set */ list_for_each_entry_safe(set, tmp, &sets, set_list) From eb10500292d48dee1c5dd1d85c8fce06e4071f72 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Mar 2015 17:33:16 +0800 Subject: [PATCH 188/321] f2fs: fix to calculate max length of contiguous free slots correctly When lookuping for creating, we will try to record the level of current dentry hash table if current dentry has enough contiguous slots for storing name of new file which will be created later, this can save our lookup time when add a link into parent dir. But currently in find_target_dentry, our current length of contiguous free slots is not calculated correctly. This make us leaving some holes in dentry block occasionally, it wastes our space of dentry block. Let's refactor the lookup flow for max slots as following to fix this issue: a) increase max_len if current slot is free; b) update max_slots with max_len if max_len is larger than max_slots; c) reset max_len to zero if current slot is not free. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 92614fe2..c00418b1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -133,13 +133,11 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, *max_slots = 0; while (bit_pos < d->max) { if (!test_bit_le(bit_pos, d->bitmap)) { - if (bit_pos == 0) - max_len = 1; - else if (!test_bit_le(bit_pos - 1, d->bitmap)) - max_len++; bit_pos++; + max_len++; continue; } + de = &d->dentry[bit_pos]; if (flags & LOOKUP_NOCASE) { if ((le16_to_cpu(de->name_len) == name->len) && @@ -151,10 +149,9 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, goto found; } - if (max_slots && max_len > *max_slots) { + if (max_slots && max_len > *max_slots) *max_slots = max_len; - max_len = 0; - } + max_len = 0; /* remain bug on condition */ if (unlikely(!de->name_len)) From 7655ddff8af7ca2234be6758f9dae3d34073c064 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2015 13:16:25 +0800 Subject: [PATCH 189/321] f2fs: fix to truncate inline data past EOF Previously if inode is with inline data, we will try to invalid partial inline data in page #0 when we truncate size of inode in truncate_partial_data_page(). And then we set page #0 to dirty, after this we can synchronize inode page with page #0 at ->writepage(). But sometimes we will fail to operate page #0 in truncate_partial_data_page() due to below reason: a) if offset is zero, we will skip setting page #0 to dirty. b) if page #0 is not uptodate, we will fail to update it as it has no mapping data. So with following operations, we will meet recent data which should be truncated. 1.write inline data to file 2.sync first data page to inode page 3.truncate file size to 0 4.truncate file size to max_inline_size 5.echo 1 > /proc/sys/vm/drop_caches 6.read file --> meet original inline data which is remained in inode page. This patch renames truncate_inline_data() to truncate_inline_inode() for code readability, then use truncate_inline_inode() to truncate inline data in inode page in truncate_blocks() and truncate page #0 in truncate_partial_data_page() for fixing. v2: o truncate partially #0 page in truncate_partial_data_page to avoid keeping old data in #0 page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 16 +++++++++++----- fs/f2fs/inline.c | 26 +++++++++++++++++++++----- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2fdfb693..50a4a908 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1762,6 +1762,7 @@ extern struct kmem_cache *inode_entry_slab; */ bool f2fs_may_inline(struct inode *); void read_inline_data(struct page *, struct page *); +bool truncate_inline_inode(struct page *, u64); int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); int f2fs_convert_inline_inode(struct inode *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cca6ec3d..053a700d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -470,15 +470,16 @@ void truncate_data_blocks(struct dnode_of_data *dn) truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); } -static int truncate_partial_data_page(struct inode *inode, u64 from) +static int truncate_partial_data_page(struct inode *inode, u64 from, + bool force) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; - if (!offset) + if (!offset && !force) return 0; - page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); + page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force); if (IS_ERR(page)) return 0; @@ -489,7 +490,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from) f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); - set_page_dirty(page); + if (!force) + set_page_dirty(page); out: f2fs_put_page(page, 1); return 0; @@ -503,6 +505,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) pgoff_t free_from; int count = 0, err = 0; struct page *ipage; + bool truncate_page = false; trace_f2fs_truncate_blocks_enter(inode, from); @@ -518,7 +521,10 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) } if (f2fs_has_inline_data(inode)) { + if (truncate_inline_inode(ipage, from)) + set_page_dirty(ipage); f2fs_put_page(ipage, 1); + truncate_page = true; goto out; } @@ -549,7 +555,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) /* lastly zero out the first data page */ if (!err) - err = truncate_partial_data_page(inode, from); + err = truncate_partial_data_page(inode, from, truncate_page); trace_f2fs_truncate_blocks_exit(inode, err); return err; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 47f5ee0a..b878f927 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -50,10 +50,26 @@ void read_inline_data(struct page *page, struct page *ipage) SetPageUptodate(page); } -static void truncate_inline_data(struct page *ipage) +bool truncate_inline_inode(struct page *ipage, u64 from) { + void *addr; + + /* + * we should never truncate inline data past max inline data size, + * because we always convert inline inode to normal one before + * truncating real data if new size is past max inline data size. + */ + f2fs_bug_on(F2FS_P_SB(ipage), from > MAX_INLINE_DATA); + + if (from >= MAX_INLINE_DATA) + return false; + + addr = inline_data_addr(ipage); + f2fs_wait_on_page_writeback(ipage, NODE); - memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA); + memset(addr + from, 0, MAX_INLINE_DATA - from); + + return true; } int f2fs_read_inline_data(struct inode *inode, struct page *page) @@ -131,7 +147,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - truncate_inline_data(dn->inode_page); + truncate_inline_inode(dn->inode_page, 0); clear_out: stat_dec_inline_inode(dn->inode); f2fs_clear_inline_inode(dn->inode); @@ -245,7 +261,7 @@ bool recover_inline_data(struct inode *inode, struct page *npage) if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - truncate_inline_data(ipage); + truncate_inline_inode(ipage, 0); f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); @@ -364,7 +380,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - truncate_inline_data(ipage); + truncate_inline_inode(ipage, 0); stat_dec_inline_dir(dir); clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); From 9af79062ec2e98faab33120a13e7012065aef187 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 12 Mar 2015 17:04:24 +0800 Subject: [PATCH 190/321] f2fs: fix to check current blkaddr in __allocate_data_blocks In __allocate_data_blocks, we should check current blkaddr which is located at ofs_in_node of dnode page instead of checking first blkaddr all the time. Otherwise we can only allocate one blkaddr in each dnode page. Fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 769878c4..7b702824 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1063,7 +1063,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); while (dn.ofs_in_node < end_offset && len) { - if (dn.data_blkaddr == NULL_ADDR) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + if (blkaddr == NULL_ADDR) { if (__allocate_data_block(&dn)) goto sync_out; allocated = true; From 440abafe0afab1a6388c89043b4b9393d1e50a6b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Mar 2015 13:42:48 -0400 Subject: [PATCH 191/321] - f2fs: fix to cover sentry_lock for block allocation In the following call stack, f2fs changes the bitmap for dirty segments and # of dirty sentries without grabbing sit_i->sentry_lock. This can result in mismatch on bitmap and # of dirty sentries, since if there are some direct_io operations. In allocate_data_block, - __allocate_new_segments - mutex_lock(&curseg->curseg_mutex); - s_ops->allocate_segment - new_curseg/change_curseg - reset_curseg - __set_sit_entry_type - __mark_sit_entry_dirty - set_bit(dirty_sentries_bitmap) - dirty_sentries++; Change-Id: I37e3b7aa273454a9fbc6e58e0069d33a72c16753 Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 72272ba0..450a2167 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1224,6 +1224,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); /* direct_io'ed data is aligned to the segment for better performance */ if (direct_io && curseg->next_blkoff) @@ -1238,7 +1239,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, */ __add_sum_entry(sbi, type, sum); - mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); stat_inc_block_count(sbi, curseg); From 3872f1324e5974725f01b894fa0c2db4f5decba4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Mar 2015 21:08:44 +0800 Subject: [PATCH 192/321] f2fs: set SBI_NEED_FSCK when encountering exception in recovery This patch tries to set SBI_NEED_FSCK flag into sbi only when we fail to recover in fill_super, so we could skip fscking image when we fail to fill super for other reason. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 50a21450..85aa5bd1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -956,7 +956,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; - bool retry = true; + bool retry = true, need_fsck = false; char *options = NULL; int i; @@ -1145,9 +1145,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_proc; - if (!retry) - set_sbi_flag(sbi, SBI_NEED_FSCK); - /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { /* @@ -1159,8 +1156,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = -EROFS; goto free_kobj; } + + if (need_fsck) + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = recover_fsync_data(sbi); if (err) { + need_fsck = true; f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%ld", err); goto free_kobj; From e5cf80d40f037653ea78b4f6cfd9a2db83389f60 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Mar 2015 23:27:25 -0400 Subject: [PATCH 193/321] f2fs: set buffer_new when new blocks are allocated This patch modifies to call set_buffer_new, if new blocks are allocated. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7b702824..b016dc2f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -254,7 +254,7 @@ static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, unsigned int blkbits = sb->s_blocksize_bits; size_t count; - set_buffer_new(bh_result); + clear_buffer_new(bh_result); map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); count = ei->fofs + ei->len - pgofs; if (count < (UINT_MAX >> blkbits)) @@ -1136,7 +1136,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, goto put_out; if (dn.data_blkaddr != NULL_ADDR) { - set_buffer_new(bh_result); + clear_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, dn.data_blkaddr); } else if (create) { err = __allocate_data_block(&dn); @@ -1181,6 +1181,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (err) goto sync_out; allocated = true; + set_buffer_new(bh_result); blkaddr = dn.data_blkaddr; } /* Give more consecutive addresses for the readahead */ From 7c1d66621ed2dbfb4643c95cdb9f0de90a49ae56 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 13 Mar 2015 21:44:36 -0700 Subject: [PATCH 194/321] f2fs: enhance multi-threads performance Previously, f2fs_write_data_pages has a mutex, sbi->writepages, to serialize data writes to maximize write bandwidth, while sacrificing multi-threads performance. Practically, however, multi-threads environment is much more important for users. So this patch tries to remove the mutex. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 ------- fs/f2fs/f2fs.h | 1 - fs/f2fs/super.c | 1 - 3 files changed, 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b016dc2f..7580b48f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1394,7 +1394,6 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool locked = false; int ret; long diff; @@ -1415,13 +1414,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, DATA, wbc); - if (!S_ISDIR(inode->i_mode)) { - mutex_lock(&sbi->writepages); - locked = true; - } ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); - if (locked) - mutex_unlock(&sbi->writepages); f2fs_submit_merged_bio(sbi, DATA, WRITE); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 50a4a908..15d7067e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -612,7 +612,6 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ - struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 85aa5bd1..8baaf2f6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1017,7 +1017,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->raw_super = raw_super; sbi->raw_super_buf = raw_super_buf; mutex_init(&sbi->gc_mutex); - mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); clear_sbi_flag(sbi, SBI_POR_DOING); From 8b031de791c34b72ae40365c4af96c0bde12cec3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 16 Mar 2015 16:54:52 -0700 Subject: [PATCH 195/321] f2fs: avoid wrong f2fs_bug_on when truncating inline_data This patch removes wrong f2fs_bug_on in truncate_inline_inode. When there is no space, it can happen a corner case where i_isze is over MAX_INLINE_SIZE while its inode is still inline_data. The scenario is 1. write small data into file #A. 2. fill the whole partition to 100%. 3. truncate 4096 on file #A. 4. write data at 8192 offset. --> f2fs_write_begin -> -ENOSPC = f2fs_convert_inline_page -> f2fs_write_failed -> truncate_blocks -> truncate_inline_inode BUG_ON, since i_size is 4096. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index b878f927..1c555389 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -54,13 +54,6 @@ bool truncate_inline_inode(struct page *ipage, u64 from) { void *addr; - /* - * we should never truncate inline data past max inline data size, - * because we always convert inline inode to normal one before - * truncating real data if new size is past max inline data size. - */ - f2fs_bug_on(F2FS_P_SB(ipage), from > MAX_INLINE_DATA); - if (from >= MAX_INLINE_DATA) return false; From e57ba84b5ccca36da2ea17a6bc21c81ad53c614e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 17 Mar 2015 17:16:35 -0700 Subject: [PATCH 196/321] f2fs: avoid punch_hole overhead when releasing volatile data This patch is to avoid some punch_hole overhead when releasing volatile data. If volatile data was not written yet, we just can make the first page as zero. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/f2fs.h | 6 ++++++ fs/f2fs/file.c | 6 ++++++ fs/f2fs/inode.c | 10 ++++++++++ 4 files changed, 24 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7580b48f..b6c06c7d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1288,6 +1288,8 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) write_data_page(page, &dn, fio); f2fs_update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); } out_writepage: f2fs_put_dnode(&dn); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 15d7067e..83fe46a0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1231,6 +1231,7 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ }; @@ -1339,6 +1340,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); } +static inline bool f2fs_is_first_block_written(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); +} + static inline bool f2fs_is_drop_cache(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 053a700d..ec3a1dce 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -451,6 +451,9 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->data_blkaddr = NULL_ADDR; f2fs_update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); + if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) + clear_inode_flag(F2FS_I(dn->inode), + FI_FIRST_BLOCK_WRITTEN); nr_free++; } if (nr_free) { @@ -1018,6 +1021,9 @@ static int f2fs_ioc_release_volatile_write(struct file *filp) if (!f2fs_is_volatile_file(inode)) return 0; + if (!f2fs_is_first_block_written(inode)) + return truncate_partial_data_page(inode, 0, true); + punch_hole(inode, 0, F2FS_BLKSIZE); return 0; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 744dbd56..7668bd12 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -50,6 +50,13 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } +static bool __written_first_block(struct f2fs_inode *ri) +{ + if (ri->i_addr[0] != NEW_ADDR && ri->i_addr[0] != NULL_ADDR) + return true; + return false; +} + static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { @@ -142,6 +149,9 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); + if (__written_first_block(ri)) + set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + f2fs_put_page(node_page, 1); stat_inc_inline_inode(inode); From 81c876201a06e1ed50be7fe8a3fcd352786fefb0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 17 Mar 2015 17:58:08 -0700 Subject: [PATCH 197/321] f2fs: add some tracepoints to debug volatile and atomic writes Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/f2fs.h | 4 ++++ fs/f2fs/segment.c | 4 ++++ include/trace/events/f2fs.h | 27 ++++++++++++++++++++++++++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b6c06c7d..7815a894 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1284,9 +1284,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) need_inplace_update(inode))) { rewrite_data_page(page, fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); + trace_f2fs_do_write_data_page(page, IPU); } else { write_data_page(page, &dn, fio); f2fs_update_extent_cache(&dn); + trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); if (page->index == 0) set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 83fe46a0..9eb4e4f8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -555,6 +555,10 @@ enum page_type { META, NR_PAGE_TYPE, META_FLUSH, + INMEM, /* the below types are used by tracepoints only. */ + INMEM_DROP, + IPU, + OPU, }; struct f2fs_io_info { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 450a2167..f7aba7ec 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -262,6 +262,8 @@ void register_inmem_page(struct inode *inode, struct page *page) list_add_tail(&new->list, &fi->inmem_pages); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); + + trace_f2fs_register_inmem_page(page, INMEM); } void commit_inmem_pages(struct inode *inode, bool abort) @@ -295,11 +297,13 @@ void commit_inmem_pages(struct inode *inode, bool abort) f2fs_wait_on_page_writeback(cur->page, DATA); if (clear_page_dirty_for_io(cur->page)) inode_dec_dirty_pages(inode); + trace_f2fs_commit_inmem_page(cur->page, INMEM); do_write_data_page(cur->page, &fio); submit_bio = true; } f2fs_put_page(cur->page, 1); } else { + trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); put_page(cur->page); } radix_tree_delete(&fi->inmem_root, cur->page->index); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 518e5d4b..4ad19239 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -14,7 +14,11 @@ { NODE, "NODE" }, \ { DATA, "DATA" }, \ { META, "META" }, \ - { META_FLUSH, "META_FLUSH" }) + { META_FLUSH, "META_FLUSH" }, \ + { INMEM, "INMEM" }, \ + { INMEM_DROP, "INMEM_DROP" }, \ + { IPU, "IN-PLACE" }, \ + { OPU, "OUT-OF-PLACE" }) #define F2FS_BIO_MASK(t) (t & (READA | WRITE_FLUSH_FUA)) #define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) @@ -854,6 +858,13 @@ DEFINE_EVENT(f2fs__page, f2fs_writepage, TP_ARGS(page, type) ); +DEFINE_EVENT(f2fs__page, f2fs_do_write_data_page, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + DEFINE_EVENT(f2fs__page, f2fs_readpage, TP_PROTO(struct page *page, int type), @@ -875,6 +886,20 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite, TP_ARGS(page, type) ); +DEFINE_EVENT(f2fs__page, f2fs_register_inmem_page, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + +DEFINE_EVENT(f2fs__page, f2fs_commit_inmem_page, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + TRACE_EVENT(f2fs_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type), From b7f59375e382473f37e92707f503023ffcbfac2d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 19:45:05 -0800 Subject: [PATCH 198/321] f2fs: add f2fs_io_tracer support This patch adds: o initial trace.c and trace.h with skeleton functions o Kconfig and Makefile to activate this feature Signed-off-by: Jaegeuk Kim --- fs/f2fs/Kconfig | 10 ++++++++++ fs/f2fs/Makefile | 1 + fs/f2fs/trace.c | 24 ++++++++++++++++++++++++ fs/f2fs/trace.h | 24 ++++++++++++++++++++++++ 4 files changed, 59 insertions(+) create mode 100644 fs/f2fs/trace.c create mode 100644 fs/f2fs/trace.h diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index ac6b0d78..dc132c54 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -71,3 +71,13 @@ config F2FS_CHECK_FS Enables BUG_ONs which check the filesystem consistency in runtime. If you want to improve the performance, say N. + +config F2FS_IO_TRACE + bool "F2FS IO tracer" + depends on F2FS_FS + depends on FUNCTION_TRACER + help + F2FS IO trace is based on a function trace, which gathers process + information and block IO patterns in the filesystem level. + + If unsure, say N. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 2e35da12..d9239773 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -5,3 +5,4 @@ f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o +f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c new file mode 100644 index 00000000..4e4a0691 --- /dev/null +++ b/fs/f2fs/trace.c @@ -0,0 +1,24 @@ +/* + * f2fs IO tracer + * + * Copyright (c) 2014 Motorola Mobility + * Copyright (c) 2014 Jaegeuk Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#include "f2fs.h" +#include "trace.h" + +void f2fs_trace_pid(struct page *page) +{ +} + +void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) +{ +} diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h new file mode 100644 index 00000000..08856a97 --- /dev/null +++ b/fs/f2fs/trace.h @@ -0,0 +1,24 @@ +/* + * f2fs IO tracer + * + * Copyright (c) 2014 Motorola Mobility + * Copyright (c) 2014 Jaegeuk Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_TRACE_H__ +#define __F2FS_TRACE_H__ + +#ifdef CONFIG_F2FS_IO_TRACE +#include + +extern void f2fs_trace_pid(struct page *); +extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); +#else +#define f2fs_trace_pid(p) +#define f2fs_trace_ios(p, i, n) + +#endif +#endif /* __F2FS_TRACE_H__ */ From 02d540d4c61217d87641a71faba77f96049398b1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 19:51:57 -0800 Subject: [PATCH 199/321] f2fs: add key functions for f2fs_io_tracer This patch adds two key functions to trace process ids and IOs. The basic idea is to 1. remain process ids, pids, in page->private. 2. show pids in IO traces. So, later we can retrieve process information according to IO traces. Signed-off-by: Jaegeuk Kim --- fs/f2fs/trace.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/trace.h | 18 +++++++++++ 2 files changed, 104 insertions(+) diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 4e4a0691..19f5216b 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -15,10 +15,96 @@ #include "f2fs.h" #include "trace.h" +RADIX_TREE(pids, GFP_NOIO); +struct last_io_info last_io; + +static inline void __print_last_io(void) +{ + if (!last_io.len) + return; + + trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n", + last_io.major, last_io.minor, + last_io.pid, "----------------", + last_io.type, + last_io.fio.rw, last_io.fio.blk_addr, + last_io.len); + memset(&last_io, 0, sizeof(last_io)); +} + +static int __file_type(struct inode *inode, pid_t pid) +{ + if (f2fs_is_atomic_file(inode)) + return __ATOMIC_FILE; + else if (f2fs_is_volatile_file(inode)) + return __VOLATILE_FILE; + else if (S_ISDIR(inode->i_mode)) + return __DIR_FILE; + else if (inode->i_ino == F2FS_NODE_INO(F2FS_I_SB(inode))) + return __NODE_FILE; + else if (inode->i_ino == F2FS_META_INO(F2FS_I_SB(inode))) + return __META_FILE; + else if (pid) + return __NORMAL_FILE; + else + return __MISC_FILE; +} + void f2fs_trace_pid(struct page *page) { + struct inode *inode = page->mapping->host; + pid_t pid = task_pid_nr(current); + void *p; + + page->private = pid; + + p = radix_tree_lookup(&pids, pid); + if (p == current) + return; + if (p) + radix_tree_delete(&pids, pid); + + f2fs_radix_tree_insert(&pids, pid, current); + + trace_printk("%3x:%3x %4x %-16s\n", + MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), + pid, current->comm); + } void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) { + struct inode *inode; + pid_t pid; + int major, minor; + + if (flush) { + __print_last_io(); + return; + } + + inode = page->mapping->host; + pid = page_private(page); + + major = MAJOR(inode->i_sb->s_dev); + minor = MINOR(inode->i_sb->s_dev); + + if (last_io.major == major && last_io.minor == minor && + last_io.pid == pid && + last_io.type == __file_type(inode, pid) && + last_io.fio.rw == fio->rw && + last_io.fio.blk_addr + last_io.len == fio->blk_addr) { + last_io.len++; + return; + } + + __print_last_io(); + + last_io.major = major; + last_io.minor = minor; + last_io.pid = pid; + last_io.type = __file_type(inode, pid); + last_io.fio = *fio; + last_io.len = 1; + return; } diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index 08856a97..aa6663be 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -14,6 +14,24 @@ #ifdef CONFIG_F2FS_IO_TRACE #include +enum file_type { + __NORMAL_FILE, + __DIR_FILE, + __NODE_FILE, + __META_FILE, + __ATOMIC_FILE, + __VOLATILE_FILE, + __MISC_FILE, +}; + +struct last_io_info { + int major, minor; + pid_t pid; + enum file_type type; + struct f2fs_io_info fio; + block_t len; +}; + extern void f2fs_trace_pid(struct page *); extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); #else From 928adea0d9ab0b18283357526b74a3d102f79b8d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 19:58:58 -0800 Subject: [PATCH 200/321] f2fs: activate f2fs_trace_pid This patch activates f2fs_trace_pid. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/checkpoint.c Change-Id: I5eb4cbd1458a8906b6511b29c663d0a4fda9bd83 --- fs/f2fs/checkpoint.c | 3 +++ fs/f2fs/node.c | 2 ++ fs/f2fs/segment.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7f61ce9d..30e3ef84 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -20,6 +20,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include static struct kmem_cache *ino_entry_slab; @@ -303,6 +304,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); SetPagePrivate(page); + f2fs_trace_pid(page); return 1; } return 0; @@ -715,6 +717,7 @@ void update_dirty_page(struct inode *inode, struct page *page) kmem_cache_free(inode_entry_slab, new); out: SetPagePrivate(page); + f2fs_trace_pid(page); } void add_dirty_dir_inode(struct inode *inode) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3a59d73e..32f4934c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -19,6 +19,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) @@ -1381,6 +1382,7 @@ static int f2fs_set_node_page_dirty(struct page *page) __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); + f2fs_trace_pid(page); return 1; } return 0; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f7aba7ec..1e5e0785 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -20,6 +20,7 @@ #include "f2fs.h" #include "segment.h" #include "node.h" +#include "trace.h" #include #define __reverse_ffz(x) __reverse_ffs(~(x)) @@ -240,6 +241,7 @@ void register_inmem_page(struct inode *inode, struct page *page) int err; SetPagePrivate(page); + f2fs_trace_pid(page); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); From 0697853d6602e84ed13f9e9b515bd74eb34670f5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 20:04:08 -0800 Subject: [PATCH 201/321] f2fs: activate f2fs_trace_ios This patch activates f2fs_trace_ios. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/data.c Change-Id: I67bc7de3c6dec046fbbbf2a4c9a579e1acf4ef5c --- fs/f2fs/data.c | 3 +++ fs/f2fs/file.c | 2 ++ fs/f2fs/super.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7815a894..762a55e7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -22,6 +22,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include static struct kmem_cache *extent_tree_slab; @@ -138,6 +139,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, struct bio *bio; trace_f2fs_submit_page_bio(page, fio); + f2fs_trace_ios(page, fio, 0); /* Allocate a new bio */ bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); @@ -186,6 +188,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, } io->last_block_in_bio = fio->blk_addr; + f2fs_trace_ios(page, fio, 0); up_write(&io->io_rwsem); trace_f2fs_submit_page_mbio(page, fio); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ec3a1dce..8526f931 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -26,6 +26,7 @@ #include "segment.h" #include "xattr.h" #include "acl.h" +#include "trace.h" #include static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, @@ -267,6 +268,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + f2fs_trace_ios(NULL, NULL, 1); return ret; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8baaf2f6..1afd69d6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -30,6 +30,7 @@ #include "segment.h" #include "xattr.h" #include "gc.h" +#include "trace.h" #define CREATE_TRACE_POINTS #include @@ -506,6 +507,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) } else { f2fs_balance_fs(sbi); } + f2fs_trace_ios(NULL, NULL, 1); return 0; } From 984f0a3cc05633c3380f49ad7dbed99562d84965 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 5 Nov 2014 20:05:53 -0800 Subject: [PATCH 202/321] f2fs: disable roll-forward when active_logs = 2 The roll-forward mechanism should be activated when the number of active logs is not 2. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ fs/f2fs/segment.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8526f931..311338f6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -131,6 +131,8 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (test_opt(sbi, FASTBOOT)) need_cp = true; + else if (sbi->active_logs == 2) + need_cp = true; return need_cp; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1e5e0785..5fbc72f4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1176,8 +1176,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && !is_cold_node(page)) - return CURSEG_HOT_NODE; + if (IS_DNODE(page) && is_cold_node(page)) + return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } From 22e1b011dfaa19d608e40fcae8bedee521935d57 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 9 Nov 2014 22:15:31 -0800 Subject: [PATCH 203/321] f2fs: implement -o dirsync If a mount option has dirsync, we should call checkpoint for all the directory operations. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/namei.c Change-Id: Id4c3e5d76004b4a50dfeae50ce4d02e77903596f --- fs/f2fs/namei.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e6582a77..4398cef1 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -139,6 +139,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, stat_inc_inline_inode(inode); d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -165,6 +168,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_unlock_op(sbi); d_instantiate(dentry, inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); @@ -234,6 +240,9 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); fail: trace_f2fs_unlink_exit(inode, err); return err; @@ -267,6 +276,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return err; out: handle_failed_inode(inode); @@ -303,6 +315,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) d_instantiate(dentry, inode); unlock_new_inode(inode); + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_fail: @@ -345,8 +359,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -459,6 +477,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; put_out_dir: From 9738ecd31436bb3d8aed47933d9a8a4c61c119d4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Jan 2015 18:33:46 -0800 Subject: [PATCH 204/321] f2fs: support norecovery mount option This patch adds a mount option, norecovery, which is mostly same as disable_roll_forward. The only difference is that norecovery should be activated with read-only mount option. This can be used when user wants to check whether f2fs is mountable or not without any recovery process. (e.g., xfstests/200) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 ++ fs/f2fs/super.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 83272002..2aec8f3c 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -106,6 +106,8 @@ background_gc=%s Turn on/off cleaning operations, namely garbage Default value for this option is on. So garbage collection is on by default. disable_roll_forward Disable the roll-forward recovery routine +norecovery Disable the roll-forward recovery routine, mounted read- + only (i.e., -o ro,disable_roll_forward) discard Issue discard/TRIM commands when a segment is cleaned. no_heap Disable heap-style segment allocation which finds free segments for data from the beginning of main area, while diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1afd69d6..ddd403d5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -42,6 +42,7 @@ static struct kset *f2fs_kset; enum { Opt_gc_background, Opt_disable_roll_forward, + Opt_norecovery, Opt_discard, Opt_noheap, Opt_user_xattr, @@ -63,6 +64,7 @@ enum { static match_table_t f2fs_tokens = { {Opt_gc_background, "background_gc=%s"}, {Opt_disable_roll_forward, "disable_roll_forward"}, + {Opt_norecovery, "norecovery"}, {Opt_discard, "discard"}, {Opt_noheap, "no_heap"}, {Opt_user_xattr, "user_xattr"}, @@ -291,6 +293,12 @@ static int parse_options(struct super_block *sb, char *options) case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); break; + case Opt_norecovery: + /* this option mounts f2fs with ro */ + set_opt(sbi, DISABLE_ROLL_FORWARD); + if (!f2fs_readonly(sb)) + return -EINVAL; + break; case Opt_discard: set_opt(sbi, DISCARD); break; From a3b3a50b4d079a0b65ba8beb8b92fba2d1f7f45d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Jan 2015 14:07:36 -0800 Subject: [PATCH 205/321] f2fs: add spin_lock to cover radix operations in IO tracer This patch adds spin_lock to cover radix tree operations in IO tracer. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ fs/f2fs/trace.c | 18 +++++++++++++++--- fs/f2fs/trace.h | 2 ++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ddd403d5..5430e98d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1274,6 +1274,8 @@ static int __init init_f2fs_fs(void) { int err; + f2fs_build_trace_ios(); + err = init_inodecache(); if (err) goto fail; diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 19f5216b..92fa38a4 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -15,7 +15,8 @@ #include "f2fs.h" #include "trace.h" -RADIX_TREE(pids, GFP_NOIO); +RADIX_TREE(pids, GFP_ATOMIC); +spinlock_t pids_lock; struct last_io_info last_io; static inline void __print_last_io(void) @@ -58,9 +59,13 @@ void f2fs_trace_pid(struct page *page) page->private = pid; + if (radix_tree_preload(GFP_NOFS)) + return; + + spin_lock(&pids_lock); p = radix_tree_lookup(&pids, pid); if (p == current) - return; + goto out; if (p) radix_tree_delete(&pids, pid); @@ -69,7 +74,9 @@ void f2fs_trace_pid(struct page *page) trace_printk("%3x:%3x %4x %-16s\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), pid, current->comm); - +out: + spin_unlock(&pids_lock); + radix_tree_preload_end(); } void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) @@ -108,3 +115,8 @@ void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) last_io.len = 1; return; } + +void f2fs_build_trace_ios(void) +{ + spin_lock_init(&pids_lock); +} diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index aa6663be..eb39fa08 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -34,9 +34,11 @@ struct last_io_info { extern void f2fs_trace_pid(struct page *); extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); +extern void f2fs_build_trace_ios(void); #else #define f2fs_trace_pid(p) #define f2fs_trace_ios(p, i, n) +#define f2fs_build_trace_ios() #endif #endif /* __F2FS_TRACE_H__ */ From 6eb9e430a2ca43c5c8c279b68939cea85a87699a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Jan 2015 14:09:48 -0800 Subject: [PATCH 206/321] f2fs: add f2fs_destroy_trace_ios to free radix tree This patch removes radix tree after finishing tracing IOs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + fs/f2fs/trace.c | 37 +++++++++++++++++++++++++++++++++++++ fs/f2fs/trace.h | 2 ++ 3 files changed, 40 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5430e98d..fc6857f3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1330,6 +1330,7 @@ static void __exit exit_f2fs_fs(void) destroy_node_manager_caches(); destroy_inodecache(); kset_unregister(f2fs_kset); + f2fs_destroy_trace_ios(); } module_init(init_f2fs_fs) diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 92fa38a4..b3570dcc 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "f2fs.h" #include "trace.h" @@ -120,3 +121,39 @@ void f2fs_build_trace_ios(void) { spin_lock_init(&pids_lock); } + +#define PIDVEC_SIZE 128 +static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index, + unsigned int max_items) +{ + struct radix_tree_iter iter; + void **slot; + unsigned int ret = 0; + + if (unlikely(!max_items)) + return 0; + + radix_tree_for_each_slot(slot, &pids, &iter, first_index) { + results[ret] = iter.index; + if (++ret == PIDVEC_SIZE) + break; + } + return ret; +} + +void f2fs_destroy_trace_ios(void) +{ + pid_t pid[PIDVEC_SIZE]; + pid_t next_pid = 0; + unsigned int found; + + spin_lock(&pids_lock); + while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { + unsigned idx; + + next_pid = pid[found - 1] + 1; + for (idx = 0; idx < found; idx++) + radix_tree_delete(&pids, pid[idx]); + } + spin_unlock(&pids_lock); +} diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index eb39fa08..1041dbeb 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -35,10 +35,12 @@ struct last_io_info { extern void f2fs_trace_pid(struct page *); extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); extern void f2fs_build_trace_ios(void); +extern void f2fs_destroy_trace_ios(void); #else #define f2fs_trace_pid(p) #define f2fs_trace_ios(p, i, n) #define f2fs_build_trace_ios() +#define f2fs_destroy_trace_ios() #endif #endif /* __F2FS_TRACE_H__ */ From f8af36c540e1c8c32dfa479103bcd35b03df6710 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 10 Feb 2015 16:23:12 -0800 Subject: [PATCH 207/321] f2fs: fix sparse warnings This patch resolves the following warnings. include/trace/events/f2fs.h:150:1: warning: expression using sizeof bool include/trace/events/f2fs.h:180:1: warning: expression using sizeof bool include/trace/events/f2fs.h:990:1: warning: expression using sizeof bool include/trace/events/f2fs.h:990:1: warning: expression using sizeof bool include/trace/events/f2fs.h:150:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) include/trace/events/f2fs.h:180:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) include/trace/events/f2fs.h:990:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) include/trace/events/f2fs.h:990:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) fs/f2fs/checkpoint.c:27:19: warning: symbol 'inode_entry_slab' was not declared. Should it be static? fs/f2fs/checkpoint.c:577:15: warning: cast to restricted __le32 fs/f2fs/checkpoint.c:592:15: warning: cast to restricted __le32 fs/f2fs/trace.c:19:1: warning: symbol 'pids' was not declared. Should it be static? fs/f2fs/trace.c:21:21: warning: symbol 'last_io' was not declared. Should it be static? Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/trace.c Change-Id: I2df2063f06c00d8903023b6116ae6485f3963d9a --- fs/f2fs/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index b3570dcc..875aa817 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -16,9 +16,9 @@ #include "f2fs.h" #include "trace.h" -RADIX_TREE(pids, GFP_ATOMIC); -spinlock_t pids_lock; -struct last_io_info last_io; +static RADIX_TREE(pids, GFP_ATOMIC); +static spinlock_t pids_lock; +static struct last_io_info last_io; static inline void __print_last_io(void) { From 86175047dfc3722e11229b181838b16ce72ec0e3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Oct 2014 19:01:10 -0700 Subject: [PATCH 208/321] f2fs: remove unnecessary macro Let's remove unused macro. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d69f35b7..502f28cf 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -182,10 +182,6 @@ struct f2fs_extent { #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) -#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) -\ - sizeof(__le32) * (DEF_ADDRS_PER_INODE + \ - DEF_NIDS_PER_INODE - 1)) - struct f2fs_inode { __le16 i_mode; /* file mode */ __u8 i_advise; /* file hints */ From 4911cccfc13d1452bec06ad8f41c6772e7b550f6 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 19 Mar 2015 13:23:48 +0800 Subject: [PATCH 209/321] f2fs: enable fast symlink by utilizing inline data Fast symlink can utilize inline data flow to avoid using any i_addr region, since we need to handle many cases such as truncation, roll-forward recovery, and fsck/dump tools. Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1c555389..02daa3cf 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -21,7 +21,7 @@ bool f2fs_may_inline(struct inode *inode) if (f2fs_is_atomic_file(inode)) return false; - if (!S_ISREG(inode->i_mode)) + if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; if (i_size_read(inode) > MAX_INLINE_DATA) From 5acc6bcfec2473393786af204dd8713416b3c4ba Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2015 19:23:32 +0800 Subject: [PATCH 210/321] f2fs: split set_data_blkaddr from f2fs_update_extent_cache Split __set_data_blkaddr from f2fs_update_extent_cache for readability. Additionally rename __set_data_blkaddr to set_data_blkaddr for exporting. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++------ fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 1 + fs/f2fs/inline.c | 1 + fs/f2fs/recovery.c | 1 + 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 762a55e7..93a22b0e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -200,7 +200,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, * ->node_page * update block addresses in the node page */ -static void __set_data_blkaddr(struct dnode_of_data *dn) +void set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn; __le32 *addr_array; @@ -229,7 +229,7 @@ int reserve_new_block(struct dnode_of_data *dn) trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); dn->data_blkaddr = NEW_ADDR; - __set_data_blkaddr(dn); + set_data_blkaddr(dn); mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; @@ -784,9 +784,6 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); - /* Update the page address in the parent node */ - __set_data_blkaddr(dn); - if (is_inode_flag_set(fi, FI_NO_EXTENT)) return; @@ -1032,7 +1029,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); /* direct IO doesn't use extent cache to maximize the performance */ - __set_data_blkaddr(dn); + set_data_blkaddr(dn); /* update i_size */ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + @@ -1290,6 +1287,7 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) trace_f2fs_do_write_data_page(page, IPU); } else { write_data_page(page, &dn, fio); + set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9eb4e4f8..e6ac6c3d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1590,6 +1590,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); +void set_data_blkaddr(struct dnode_of_data *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 311338f6..03059664 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -453,6 +453,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) continue; dn->data_blkaddr = NULL_ADDR; + set_data_blkaddr(dn); f2fs_update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 02daa3cf..c85a7a36 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -131,6 +131,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_page_writeback(page); fio.blk_addr = dn->data_blkaddr; write_data_page(page, dn, &fio); + set_data_blkaddr(dn); f2fs_update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); if (dirty) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f7be540d..33c8e8dc 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -403,6 +403,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); dn.data_blkaddr = dest; + set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); recovered++; } From 0aafdbd48b7742d22a6e5675e811ff061f39b28b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2015 19:24:59 +0800 Subject: [PATCH 211/321] f2fs: introduce __{find,grab}_extent_tree This patch introduces __{find,grab}_extent_tree for reusing by following patches. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 79 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 93a22b0e..1309c21a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -394,6 +394,49 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi, et->cached_en = NULL; } +static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi, + nid_t ino) +{ + struct extent_tree *et; + + down_read(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, ino); + if (!et) { + up_read(&sbi->extent_tree_lock); + return NULL; + } + atomic_inc(&et->refcount); + up_read(&sbi->extent_tree_lock); + + return et; +} + +static struct extent_tree *__grab_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + nid_t ino = inode->i_ino; + + down_write(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, ino); + if (!et) { + et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); + f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); + memset(et, 0, sizeof(struct extent_tree)); + et->ino = ino; + et->root = RB_ROOT; + et->cached_en = NULL; + rwlock_init(&et->lock); + atomic_set(&et->refcount, 0); + et->count = 0; + sbi->total_ext_tree++; + } + atomic_inc(&et->refcount); + up_write(&sbi->extent_tree_lock); + + return et; +} + static struct extent_node *__lookup_extent_tree(struct extent_tree *et, unsigned int fofs) { @@ -538,14 +581,9 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, trace_f2fs_lookup_extent_tree_start(inode, pgofs); - down_read(&sbi->extent_tree_lock); - et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); - if (!et) { - up_read(&sbi->extent_tree_lock); + et = __find_extent_tree(sbi, inode->i_ino); + if (!et) return false; - } - atomic_inc(&et->refcount); - up_read(&sbi->extent_tree_lock); read_lock(&et->lock); en = __lookup_extent_tree(et, pgofs); @@ -570,7 +608,6 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - nid_t ino = inode->i_ino; struct extent_tree *et; struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; struct extent_node *den = NULL; @@ -579,22 +616,7 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, trace_f2fs_update_extent_tree(inode, fofs, blkaddr); - down_write(&sbi->extent_tree_lock); - et = radix_tree_lookup(&sbi->extent_tree_root, ino); - if (!et) { - et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); - f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); - memset(et, 0, sizeof(struct extent_tree)); - et->ino = ino; - et->root = RB_ROOT; - et->cached_en = NULL; - rwlock_init(&et->lock); - atomic_set(&et->refcount, 0); - et->count = 0; - sbi->total_ext_tree++; - } - atomic_inc(&et->refcount); - up_write(&sbi->extent_tree_lock); + et = __grab_extent_tree(inode); write_lock(&et->lock); @@ -732,14 +754,9 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (!test_opt(sbi, EXTENT_CACHE)) return; - down_read(&sbi->extent_tree_lock); - et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); - if (!et) { - up_read(&sbi->extent_tree_lock); + et = __find_extent_tree(sbi, inode->i_ino); + if (!et) goto out; - } - atomic_inc(&et->refcount); - up_read(&sbi->extent_tree_lock); /* free all extent info belong to this extent tree */ write_lock(&et->lock); From 61388e7a8ec305efb63ea323aea8e604f5bad0b1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2015 19:26:02 +0800 Subject: [PATCH 212/321] f2fs: initialize extent tree with on-disk extent info of inode With normal extent info cache, we records largest extent mapping between logical block and physical block into extent info, and we persist extent info in on-disk inode. When we enable extent tree cache, if extent info of on-disk inode is exist, and the extent is not a small fragmented mapping extent. We'd better to load the extent info into extent tree cache when inode is loaded. By this way we can have more chance to hit extent tree cache rather than taking more time to read dnode page for block address. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 43 +++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/inode.c | 4 +--- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1309c21a..48b5fd35 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -572,6 +572,39 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, return count - et->count; } +static void f2fs_init_extent_tree(struct inode *inode, + struct f2fs_extent *i_ext) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + struct extent_node *en; + struct extent_info ei; + + if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) + return; + + et = __grab_extent_tree(inode); + + write_lock(&et->lock); + if (et->count) + goto out; + + set_extent_info(&ei, le32_to_cpu(i_ext->fofs), + le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); + + en = __insert_extent_tree(sbi, et, &ei, NULL); + if (en) { + et->cached_en = en; + + spin_lock(&sbi->extent_lock); + list_add_tail(&en->list, &sbi->extent_list); + spin_unlock(&sbi->extent_lock); + } +out: + write_unlock(&et->lock); + atomic_dec(&et->refcount); +} + static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { @@ -782,6 +815,16 @@ void f2fs_destroy_extent_tree(struct inode *inode) return; } +void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext) +{ + if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) + f2fs_init_extent_tree(inode, i_ext); + + write_lock(&F2FS_I(inode)->ext_lock); + get_extent_info(&F2FS_I(inode)->ext, *i_ext); + write_unlock(&F2FS_I(inode)->ext_lock); +} + static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e6ac6c3d..c13f04e6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1595,6 +1595,7 @@ int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); void f2fs_destroy_extent_tree(struct inode *); +void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); void f2fs_update_extent_cache(struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 7668bd12..2abdd60d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -136,9 +136,7 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - write_lock(&fi->ext_lock); - get_extent_info(&fi->ext, ri->i_ext); - write_unlock(&fi->ext_lock); + f2fs_init_extent_cache(inode, &ri->i_ext); get_inline_info(fi, ri); From 672812065c651f946b9a570cbffce2cddf2c24bb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2015 19:27:51 +0800 Subject: [PATCH 213/321] f2fs: preserve extent info for extent cache This patch tries to preserve last extent info in extent tree cache into on-disk inode, so this can help us to reuse the last extent info next time for performance. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/inode.c | 5 +++++ 3 files changed, 62 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 48b5fd35..9a219afc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -719,6 +719,55 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, atomic_dec(&et->refcount); } +void f2fs_preserve_extent_tree(struct inode *inode) +{ + struct extent_tree *et; + struct extent_info *ext = &F2FS_I(inode)->ext; + bool sync = false; + + if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) + return; + + et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino); + if (!et) { + if (ext->len) { + ext->len = 0; + update_inode_page(inode); + } + return; + } + + read_lock(&et->lock); + if (et->count) { + struct extent_node *en; + + if (et->cached_en) { + en = et->cached_en; + } else { + struct rb_node *node = rb_first(&et->root); + + if (!node) + node = rb_last(&et->root); + en = rb_entry(node, struct extent_node, rb_node); + } + + if (__is_extent_same(ext, &en->ei)) + goto out; + + *ext = en->ei; + sync = true; + } else if (ext->len) { + ext->len = 0; + sync = true; + } +out: + read_unlock(&et->lock); + atomic_dec(&et->refcount); + + if (sync) + update_inode_page(inode); +} + void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) { struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c13f04e6..69a43a23 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -370,6 +370,13 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, ei->len = len; } +static inline bool __is_extent_same(struct extent_info *ei1, + struct extent_info *ei2) +{ + return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk && + ei1->len == ei2->len); +} + static inline bool __is_extent_mergeable(struct extent_info *back, struct extent_info *front) { @@ -1597,6 +1604,7 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); void f2fs_destroy_extent_tree(struct inode *); void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); void f2fs_update_extent_cache(struct dnode_of_data *); +void f2fs_preserve_extent_tree(struct inode *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2abdd60d..eed1284d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -342,7 +342,12 @@ void f2fs_evict_inode(struct inode *inode) no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); + + /* update extent info in inode */ + if (inode->i_nlink) + f2fs_preserve_extent_tree(inode); f2fs_destroy_extent_tree(inode); + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); From 6516907b0365a0307cbdf6c8f1ef429050e8afa2 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 24 Mar 2015 10:20:27 +0800 Subject: [PATCH 214/321] f2fs: enable inline data by default Enable inline_data feature by default since it brings us better performance and space utilization and now has already stable. Add another option noinline_data to disable it during mount. Suggested-by: Jaegeuk Kim Suggested-by: Chao Yu Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 ++ fs/f2fs/super.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 2aec8f3c..26e78780 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -144,6 +144,8 @@ extent_cache Enable an extent cache based on rb-tree, it can cache as many as extent which map between contiguous logical address and physical address per inode, resulting in increasing the cache hit ratio. +noinline_data Disable the inline data feature, inline data feature is + enabled by default. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fc6857f3..45aa843f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -58,6 +58,7 @@ enum { Opt_nobarrier, Opt_fastboot, Opt_extent_cache, + Opt_noinline_data, Opt_err, }; @@ -80,6 +81,7 @@ static match_table_t f2fs_tokens = { {Opt_nobarrier, "nobarrier"}, {Opt_fastboot, "fastboot"}, {Opt_extent_cache, "extent_cache"}, + {Opt_noinline_data, "noinline_data"}, {Opt_err, NULL}, }; @@ -372,6 +374,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_extent_cache: set_opt(sbi, EXTENT_CACHE); break; + case Opt_noinline_data: + clear_opt(sbi, INLINE_DATA); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -596,6 +601,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); + else + seq_puts(seq, ",noinline_data"); if (test_opt(sbi, INLINE_DENTRY)) seq_puts(seq, ",inline_dentry"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) @@ -991,6 +998,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->active_logs = NR_CURSEG_TYPE; set_opt(sbi, BG_GC); + set_opt(sbi, INLINE_DATA); #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); From cfa4c1a38926b3e11f8ef6f5bb6416922faade1e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2015 10:33:37 +0800 Subject: [PATCH 215/321] f2fs: preallocate fallocated blocks for direct IO Normally, due to DIO_SKIP_HOLES flag is set by default, blockdev_direct_IO in f2fs_direct_IO tries to skip DIO in holes when writing inside i_size, this makes us falling back to buffered IO which shows lower performance. So in commit 59b802e5a453 ("f2fs: allocate data blocks in advance for f2fs_direct_IO"), we improve perfromance by allocating data blocks in advance if we meet holes no matter in i_size or not, since with it we can avoid falling back to buffered IO. But we forget to consider for unwritten fallocated block in this commit. This patch tries to fix it for fallocate case, this helps to improve performance. Test result: Storage info: sandisk ultra 64G micro sd card. touch /mnt/f2fs/file truncate -s 67108864 /mnt/f2fs/file fallocate -o 0 -l 67108864 /mnt/f2fs/file time dd if=/dev/zero of=/mnt/f2fs/file bs=1M count=64 conv=notrunc oflag=direct Time before applying the patch: 67108864 bytes (67 MB) copied, 36.16 s, 1.9 MB/s real 0m36.162s user 0m0.000s sys 0m0.180s Time after applying the patch: 67108864 bytes (67 MB) copied, 27.7776 s, 2.4 MB/s real 0m27.780s user 0m0.000s sys 0m0.036s Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9a219afc..f7c10ca6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1126,16 +1126,23 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; + + dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + if (dn->data_blkaddr == NEW_ADDR) + goto alloc; + if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; +alloc: get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) seg = CURSEG_DIRECT_IO; - allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); + allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, + &sum, seg); /* direct IO doesn't use extent cache to maximize the performance */ set_data_blkaddr(dn); @@ -1175,7 +1182,7 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, block_t blkaddr; blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); - if (blkaddr == NULL_ADDR) { + if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) { if (__allocate_data_block(&dn)) goto sync_out; allocated = true; From 9e35ce38f0cabfbe05562dadaa5792e017c7e977 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2015 10:36:15 +0800 Subject: [PATCH 216/321] f2fs: avoid NULL pointer dereference in f2fs_xattr_advise_get We will encounter oops by executing below command. getfattr -n system.advise /mnt/f2fs/file Killed message log: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] f2fs_xattr_advise_get+0x29/0x40 [f2fs] *pdpt = 00000000319b7001 *pde = 0000000000000000 Oops: 0002 [#1] SMP Modules linked in: f2fs(O) snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq joydev snd_seq_device snd_timer bnep snd rfcomm microcode bluetooth soundcore i2c_piix4 mac_hid serio_raw parport_pc ppdev lp parport binfmt_misc hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs] CPU: 3 PID: 3134 Comm: getfattr Tainted: G O 4.0.0-rc1 #6 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 task: f3a71b60 ti: f19a6000 task.ti: f19a6000 EIP: 0060:[] EFLAGS: 00010246 CPU: 3 EIP is at f2fs_xattr_advise_get+0x29/0x40 [f2fs] EAX: 00000000 EBX: f19a7e71 ECX: 00000000 EDX: f8b5b467 ESI: 00000000 EDI: f2008570 EBP: f19a7e14 ESP: f19a7e08 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 80050033 CR2: 00000000 CR3: 319b8000 CR4: 000007f0 Stack: f8b5a634 c0cbb580 00000000 f19a7e34 c1193850 00000000 00000007 f19a7e71 f19a7e64 c0cbb580 c1193810 f19a7e50 c1193c00 00000000 00000000 00000000 c0cbb580 00000000 f19a7f70 c1194097 00000000 00000000 00000000 74737973 Call Trace: [] generic_getxattr+0x40/0x50 [] ? xattr_resolve_name+0x80/0x80 [] vfs_getxattr+0x70/0xa0 [] getxattr+0x87/0x190 [] ? path_lookupat+0x57/0x5f0 [] ? putname+0x32/0x50 [] ? kmem_cache_alloc+0x2a/0x130 [] ? putname+0x32/0x50 [] ? putname+0x32/0x50 [] ? putname+0x32/0x50 [] ? user_path_at_empty+0x49/0x70 [] ? user_path_at+0x1f/0x30 [] path_getxattr+0x47/0x80 [] SyS_getxattr+0x27/0x30 [] sysenter_do_call+0x12/0x12 Code: 66 90 55 89 e5 57 56 53 66 66 66 66 90 8b 78 20 89 d3 ba 67 b4 b5 f8 89 d8 89 ce e8 42 7c 7b c8 85 c0 75 16 0f b6 87 44 01 00 00 <88> 06 b8 01 00 00 00 5b 5e 5f 5d c3 8d 76 00 b8 ea ff ff ff eb EIP: [] f2fs_xattr_advise_get+0x29/0x40 [f2fs] SS:ESP 0068:f19a7e08 CR2: 0000000000000000 ---[ end trace 860260654f1f416a ]--- The reason is that in getfattr there are two steps which is indicated by strace info: 1) try to lookup and get size of specified xattr. 2) get value of the extented attribute. strace info: getxattr("/mnt/f2fs/file", "system.advise", 0x0, 0) = 1 getxattr("/mnt/f2fs/file", "system.advise", "\x00", 256) = 1 For the first step, getfattr may pass a NULL pointer in @value and zero in @size as parameters for ->getxattr, but we access this @value pointer directly without checking whether the pointer is valid or not in f2fs_xattr_advise_get, so the oops occurs. This patch fixes this issue by verifying @value pointer before using. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 2f49a58a..2e49a6df 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -134,7 +134,8 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, if (strcmp(name, "") != 0) return -EINVAL; - *((char *)buffer) = F2FS_I(inode)->i_advise; + if (buffer) + *((char *)buffer) = F2FS_I(inode)->i_advise; return sizeof(char); } From 6ccac6c88327059050de197917c36acabd4277a4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2015 10:37:39 +0800 Subject: [PATCH 217/321] f2fs: persist system.advise into on-disk inode This patch fixes to dirty inode for persisting i_advise of f2fs inode info into on-disk inode if user sets system.advise through setxattr. Otherwise the new value will be lost. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 2e49a6df..0569d5f8 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -152,6 +152,7 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, return -EINVAL; F2FS_I(inode)->i_advise |= *(char *)value; + mark_inode_dirty(inode); return 0; } From 665487343eb0cf5368f4f50cad26115b7575846b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 24 Mar 2015 13:08:05 +0800 Subject: [PATCH 218/321] f2fs: limit b_size of mapped bh in f2fs_map_bh Map bh over max size which caller defined is not needed, limit it in f2fs_map_bh. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f7c10ca6..dbbab45a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -255,15 +255,13 @@ static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, struct extent_info *ei, struct buffer_head *bh_result) { unsigned int blkbits = sb->s_blocksize_bits; - size_t count; + size_t max_size = bh_result->b_size; + size_t mapped_size; clear_buffer_new(bh_result); map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); - count = ei->fofs + ei->len - pgofs; - if (count < (UINT_MAX >> blkbits)) - bh_result->b_size = (count << blkbits); - else - bh_result->b_size = UINT_MAX; + mapped_size = (ei->fofs + ei->len - pgofs) << blkbits; + bh_result->b_size = min(max_size, mapped_size); } static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, From 4fefc102e6f3ba5de16a719b20b1f29bf684e224 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Mar 2015 12:04:20 -0700 Subject: [PATCH 219/321] f2fs: fix sparse warnings This patch fixes the below warning. sparse warnings: (new ones prefixed by >>) >> fs/f2fs/inode.c:56:23: sparse: restricted __le32 degrades to integer >> fs/f2fs/inode.c:56:52: sparse: restricted __le32 degrades to integer Reported-by: kbuild test robot Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index eed1284d..42d2bf20 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -52,7 +52,9 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) static bool __written_first_block(struct f2fs_inode *ri) { - if (ri->i_addr[0] != NEW_ADDR && ri->i_addr[0] != NULL_ADDR) + block_t addr = le32_to_cpu(ri->i_addr[0]); + + if (addr != NEW_ADDR && addr != NULL_ADDR) return true; return false; } From 0b299089d0027fccfc59fc2b6190c455d51f7cc1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 26 Mar 2015 18:46:38 -0700 Subject: [PATCH 220/321] f2fs: fix mismatching lock and unlock pages for roll-forward recovery Previously, inode page is not correctly locked and unlocked in pair during the roll-forward recovery. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 48 ++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 33c8e8dc..e4f6291d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -258,6 +258,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, struct f2fs_summary_block *sum_node; struct f2fs_summary sum; struct page *sum_page, *node_page; + struct dnode_of_data tdn = *dn; nid_t ino, nid; struct inode *inode; unsigned int offset; @@ -285,17 +286,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, /* Use the locked dnode page and inode */ nid = le32_to_cpu(sum.nid); if (dn->inode->i_ino == nid) { - struct dnode_of_data tdn = *dn; tdn.nid = nid; + if (!dn->inode_page_locked) + lock_page(dn->inode_page); tdn.node_page = dn->inode_page; tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); - truncate_data_blocks_range(&tdn, 1); - return 0; + goto truncate_out; } else if (dn->nid == nid) { - struct dnode_of_data tdn = *dn; tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); - truncate_data_blocks_range(&tdn, 1); - return 0; + goto truncate_out; } /* Get the node page */ @@ -319,18 +318,33 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, bidx = start_bidx_of_node(offset, F2FS_I(inode)) + le16_to_cpu(sum.ofs_in_node); - if (ino != dn->inode->i_ino) { - truncate_hole(inode, bidx, bidx + 1); + /* + * if inode page is locked, unlock temporarily, but its reference + * count keeps alive. + */ + if (ino == dn->inode->i_ino && dn->inode_page_locked) + unlock_page(dn->inode_page); + + set_new_dnode(&tdn, inode, NULL, NULL, 0); + if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) + goto out; + + if (tdn.data_blkaddr == blkaddr) + truncate_data_blocks_range(&tdn, 1); + + f2fs_put_dnode(&tdn); +out: + if (ino != dn->inode->i_ino) iput(inode); - } else { - struct dnode_of_data tdn; - set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); - if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) - return 0; - if (tdn.data_blkaddr != NULL_ADDR) - truncate_data_blocks_range(&tdn, 1); - f2fs_put_page(tdn.node_page, 1); - } + else if (dn->inode_page_locked) + lock_page(dn->inode_page); + return 0; + +truncate_out: + if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr) + truncate_data_blocks_range(&tdn, 1); + if (dn->inode->i_ino == nid && !dn->inode_page_locked) + unlock_page(dn->inode_page); return 0; } From e805761914158920baf42a7ab32438d20ad86a27 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Mar 2015 15:07:16 -0700 Subject: [PATCH 221/321] f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries If f2fs was corrupted with missing dot dentries, it needs to recover them after fsck.f2fs detection. The underlying precedure is: 1. The fsck.f2fs remains F2FS_INLINE_DOTS flag in directory inode, if it detects missing dot dentries. 2. When f2fs looks up the corrupted directory, it triggers f2fs_add_link with proper inode numbers and their dot and dotdot names. 3. Once f2fs recovers the directory without errors, it removes F2FS_INLINE_DOTS finally. Change-Id: Ia6156d2488a7f683b7c36d831ce7bcbf9094497c Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 50 ++++++++++++++++++++++------------------- fs/f2fs/f2fs.h | 22 +++++++++++++----- fs/f2fs/inline.c | 27 +++++++++++++--------- fs/f2fs/namei.c | 48 +++++++++++++++++++++++++++++++++++++++ fs/f2fs/recovery.c | 2 +- include/linux/f2fs_fs.h | 1 + 6 files changed, 110 insertions(+), 40 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c00418b1..b7ff44ef 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -64,9 +64,8 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +void set_de_type(struct f2fs_dir_entry *de, umode_t mode) { - umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } @@ -297,7 +296,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, lock_page(page); f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); + set_de_type(de, inode->i_mode); f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; @@ -343,14 +342,14 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent, de->hash_code = 0; de->ino = cpu_to_le32(inode->i_ino); memcpy(d->filename[0], ".", 1); - set_de_type(de, inode); + set_de_type(de, inode->i_mode); de = &d->dentry[1]; de->hash_code = 0; de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); memcpy(d->filename[1], "..", 2); - set_de_type(de, inode); + set_de_type(de, inode->i_mode); test_and_set_bit_le(0, (void *)d->bitmap); test_and_set_bit_le(1, (void *)d->bitmap); @@ -447,7 +446,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); @@ -462,7 +461,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } @@ -486,7 +485,7 @@ int room_for_filename(const void *bitmap, int slots, int max_slots) goto next; } -void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, +void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos) { @@ -498,8 +497,8 @@ void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, de->hash_code = name_hash; de->name_len = cpu_to_le16(name->len); memcpy(d->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); + de->ino = cpu_to_le32(ino); + set_de_type(de, mode); for (i = 0; i < slots; i++) test_and_set_bit_le(bit_pos + i, (void *)d->bitmap); } @@ -509,7 +508,7 @@ void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, * f2fs_unlock_op(). */ int __f2fs_add_link(struct inode *dir, const struct qstr *name, - struct inode *inode) + struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; @@ -526,7 +525,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, int err = 0; if (f2fs_has_inline_dentry(dir)) { - err = f2fs_add_inline_entry(dir, name, inode); + err = f2fs_add_inline_entry(dir, name, inode, ino, mode); if (!err || err != -EAGAIN) return err; else @@ -576,26 +575,31 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; + if (inode) { + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, NULL); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } } make_dentry_ptr(&d, (void *)dentry_blk, 1); - f2fs_update_dentry(inode, &d, name, dentry_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos); set_page_dirty(dentry_page); - /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); + if (inode) { + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + } update_parent_metadata(dir, inode, current_depth); fail: - up_write(&F2FS_I(inode)->i_sem); + if (inode) + up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode_page(dir); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 69a43a23..83380e21 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1245,6 +1245,7 @@ enum { FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ + FI_INLINE_DOTS, /* indicate inline dot dentries */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1281,6 +1282,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_DENTRY); if (ri->i_inline & F2FS_DATA_EXIST) set_inode_flag(fi, FI_DATA_EXIST); + if (ri->i_inline & F2FS_INLINE_DOTS) + set_inode_flag(fi, FI_INLINE_DOTS); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1296,6 +1299,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(fi, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; + if (is_inode_flag_set(fi, FI_INLINE_DOTS)) + ri->i_inline |= F2FS_INLINE_DOTS; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1341,6 +1346,11 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); } +static inline int f2fs_has_inline_dots(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1439,7 +1449,7 @@ struct dentry *f2fs_get_parent(struct dentry *child); * dir.c */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; -void set_de_type(struct f2fs_dir_entry *, struct inode *); +void set_de_type(struct f2fs_dir_entry *, umode_t); struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, struct f2fs_dentry_ptr *, unsigned int); bool f2fs_fill_dentries(struct file *, void *, filldir_t, @@ -1458,9 +1468,10 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); -void f2fs_update_dentry(struct inode *, struct f2fs_dentry_ptr *, +void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, const struct qstr *, f2fs_hash_t , unsigned int); -int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); +int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, + umode_t); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); @@ -1470,7 +1481,7 @@ bool f2fs_empty_dir(struct inode *); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, - inode); + inode, inode->i_ino, inode->i_mode); } /* @@ -1791,7 +1802,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, struct page **, unsigned int); struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); -int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, + nid_t, umode_t); void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c85a7a36..c5e35fcd 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -391,7 +391,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, } int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, - struct inode *inode) + struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; @@ -418,29 +418,34 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, goto out; } - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, ipage); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; + if (inode) { + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, ipage); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } } f2fs_wait_on_page_writeback(ipage, NODE); name_hash = f2fs_dentry_hash(name); make_dentry_ptr(&d, (void *)dentry_blk, 2); - f2fs_update_dentry(inode, &d, name, name_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); set_page_dirty(ipage); /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); + if (inode) { + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + } update_parent_metadata(dir, inode, 0); fail: - up_write(&F2FS_I(inode)->i_sem); + if (inode) + up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode(dir, ipage); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4398cef1..52b87251 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -188,6 +188,44 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); } +static int __recover_dot_dentries(struct inode *dir, nid_t pino) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct qstr dot = {.len = 1, .name = "."}; + struct qstr dotdot = {.len = 2, .name = ".."}; + struct f2fs_dir_entry *de; + struct page *page; + int err = 0; + + f2fs_lock_op(sbi); + + de = f2fs_find_entry(dir, &dot, &page, 0); + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + + if (de) + goto dotdot; + + err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); + if (err) + goto out; +dotdot: + de = f2fs_find_entry(dir, &dotdot, &page, 0); + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + + if (!de) + err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); +out: + if (!err) { + clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS); + mark_inode_dirty(dir); + } + + f2fs_unlock_op(sbi); + return err; +} + static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -207,6 +245,16 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); + + if (f2fs_has_inline_dots(inode)) { + int err; + + err = __recover_dot_dentries(inode, dir->i_ino); + if (err) { + iget_failed(inode); + return ERR_PTR(err); + } + } } return d_splice_alias(inode, dentry); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e4f6291d..1ba96e92 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -115,7 +115,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) iput(einode); goto retry; } - err = __f2fs_add_link(dir, &name, inode); + err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); if (err) goto out_err; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 502f28cf..591f8c3e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -178,6 +178,7 @@ struct f2fs_extent { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) From 8da32c790cbeaf96354f7472ea99a96b93118372 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Mar 2015 15:23:45 -0700 Subject: [PATCH 222/321] f2fs: assign parent's i_mode for empty dir When assigning i_mode for dotdot, it needs to assign parent's i_mode. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b7ff44ef..937f5df5 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -349,7 +349,7 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent, de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); memcpy(d->filename[1], "..", 2); - set_de_type(de, inode->i_mode); + set_de_type(de, parent->i_mode); test_and_set_bit_le(0, (void *)d->bitmap); test_and_set_bit_le(1, (void *)d->bitmap); From 769cfb7cec52565c49a1f9732c510d0bd7ec6437 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 31 Mar 2015 18:03:29 -0700 Subject: [PATCH 223/321] f2fs: do not increase link count during recovery If there are multiple fsynced dnodes having a dent flag, roll-forward routine sets FI_INC_LINK for their inode, and recovery_dentry increases its link count accordingly. That results in normal file having a link count as 2, so we can't unlink those files. This was added to handle several inode blocks having same inode number with different directory paths. But, current f2fs doesn't replay all of path changes and only recover its dentry for the last fsynced inode block. So, there is no reason to do this. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/recovery.c Change-Id: Ib1f5f1fce966bed74c57c67eafd38713ae066d94 --- fs/f2fs/recovery.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 1ba96e92..209ba0da 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -93,10 +93,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage) } retry: de = f2fs_find_entry(dir, &name, &page, 0); - if (de && inode->i_ino == le32_to_cpu(de->ino)) { - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + if (de && inode->i_ino == le32_to_cpu(de->ino)) goto out_unmap_put; - } + if (de) { einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { @@ -187,11 +186,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) goto next; entry = get_fsync_inode(head, ino_of_node(page)); - if (entry) { - if (IS_INODE(page) && is_dent_dnode(page)) - set_inode_flag(F2FS_I(entry->inode), - FI_INC_LINK); - } else { + if (!entry) { if (IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) From 4d5b83ece6269efee4a10f99cdcf29cfb40ac267 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 1 Apr 2015 19:38:20 -0700 Subject: [PATCH 224/321] f2fs: do not recover wrong data index During the roll-forward recovery, if we found a new data index written fsync lastly, we need to recover new block address. But, if that address was corrupted, we should not recover that. Otherwise, f2fs gets kernel panic from: In check_index_in_prev_nodes(), sentry = get_seg_entry(sbi, segno); --------------------------> out-of-range segno. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 209ba0da..c8d969c9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -395,7 +395,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, src = datablock_addr(dn.node_page, dn.ofs_in_node); dest = datablock_addr(page, dn.ofs_in_node); - if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { + if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && + dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) { + if (src == NULL_ADDR) { err = reserve_new_block(&dn); /* We should not get -ENOSPC */ From b66dcebc014392f0fab59fb60b1127ab22d98661 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 8 Apr 2015 10:09:15 -0700 Subject: [PATCH 225/321] Revert "f2fs: assign parent's i_mode for empty dir" This reverts commit f8b6e35a758fb17c34c5c0aee7b2afea1eb6ee0c. --- fs/f2fs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 937f5df5..b7ff44ef 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -349,7 +349,7 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent, de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); memcpy(d->filename[1], "..", 2); - set_de_type(de, parent->i_mode); + set_de_type(de, inode->i_mode); test_and_set_bit_le(0, (void *)d->bitmap); test_and_set_bit_le(1, (void *)d->bitmap); From cc500ec0a83d49b4687ba9e36f9dc8e8be0f01c2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 8 Apr 2015 10:09:19 -0700 Subject: [PATCH 226/321] Revert "f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries" This reverts commit 476786178aa918a7480a3e4644b60d91d553a44e. --- fs/f2fs/dir.c | 50 +++++++++++++++++++---------------------- fs/f2fs/f2fs.h | 22 +++++------------- fs/f2fs/inline.c | 27 +++++++++------------- fs/f2fs/namei.c | 48 --------------------------------------- fs/f2fs/recovery.c | 2 +- include/linux/f2fs_fs.h | 1 - 6 files changed, 40 insertions(+), 110 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b7ff44ef..c00418b1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -64,8 +64,9 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -void set_de_type(struct f2fs_dir_entry *de, umode_t mode) +void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) { + umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } @@ -296,7 +297,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, lock_page(page); f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode->i_mode); + set_de_type(de, inode); f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; @@ -342,14 +343,14 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent, de->hash_code = 0; de->ino = cpu_to_le32(inode->i_ino); memcpy(d->filename[0], ".", 1); - set_de_type(de, inode->i_mode); + set_de_type(de, inode); de = &d->dentry[1]; de->hash_code = 0; de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); memcpy(d->filename[1], "..", 2); - set_de_type(de, inode->i_mode); + set_de_type(de, inode); test_and_set_bit_le(0, (void *)d->bitmap); test_and_set_bit_le(1, (void *)d->bitmap); @@ -446,7 +447,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); @@ -461,7 +462,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } @@ -485,7 +486,7 @@ int room_for_filename(const void *bitmap, int slots, int max_slots) goto next; } -void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, +void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos) { @@ -497,8 +498,8 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, de->hash_code = name_hash; de->name_len = cpu_to_le16(name->len); memcpy(d->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(ino); - set_de_type(de, mode); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); for (i = 0; i < slots; i++) test_and_set_bit_le(bit_pos + i, (void *)d->bitmap); } @@ -508,7 +509,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, * f2fs_unlock_op(). */ int __f2fs_add_link(struct inode *dir, const struct qstr *name, - struct inode *inode, nid_t ino, umode_t mode) + struct inode *inode) { unsigned int bit_pos; unsigned int level; @@ -525,7 +526,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, int err = 0; if (f2fs_has_inline_dentry(dir)) { - err = f2fs_add_inline_entry(dir, name, inode, ino, mode); + err = f2fs_add_inline_entry(dir, name, inode); if (!err || err != -EAGAIN) return err; else @@ -575,31 +576,26 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); - if (inode) { - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; - } + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, NULL); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; } make_dentry_ptr(&d, (void *)dentry_blk, 1); - f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos); + f2fs_update_dentry(inode, &d, name, dentry_hash, bit_pos); set_page_dirty(dentry_page); - if (inode) { - /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); - } + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); update_parent_metadata(dir, inode, current_depth); fail: - if (inode) - up_write(&F2FS_I(inode)->i_sem); + up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode_page(dir); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 83380e21..69a43a23 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1245,7 +1245,6 @@ enum { FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1282,8 +1281,6 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_DENTRY); if (ri->i_inline & F2FS_DATA_EXIST) set_inode_flag(fi, FI_DATA_EXIST); - if (ri->i_inline & F2FS_INLINE_DOTS) - set_inode_flag(fi, FI_INLINE_DOTS); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1299,8 +1296,6 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(fi, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(fi, FI_INLINE_DOTS)) - ri->i_inline |= F2FS_INLINE_DOTS; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1346,11 +1341,6 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); } -static inline int f2fs_has_inline_dots(struct inode *inode) -{ - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS); -} - static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1449,7 +1439,7 @@ struct dentry *f2fs_get_parent(struct dentry *child); * dir.c */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; -void set_de_type(struct f2fs_dir_entry *, umode_t); +void set_de_type(struct f2fs_dir_entry *, struct inode *); struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, struct f2fs_dentry_ptr *, unsigned int); bool f2fs_fill_dentries(struct file *, void *, filldir_t, @@ -1468,10 +1458,9 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); -void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, +void f2fs_update_dentry(struct inode *, struct f2fs_dentry_ptr *, const struct qstr *, f2fs_hash_t , unsigned int); -int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, - umode_t); +int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); @@ -1481,7 +1470,7 @@ bool f2fs_empty_dir(struct inode *); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, - inode, inode->i_ino, inode->i_mode); + inode); } /* @@ -1802,8 +1791,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, struct page **, unsigned int); struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); -int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, - nid_t, umode_t); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c5e35fcd..c85a7a36 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -391,7 +391,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, } int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, - struct inode *inode, nid_t ino, umode_t mode) + struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; @@ -418,34 +418,29 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, goto out; } - if (inode) { - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, ipage); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; - } + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, ipage); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; } f2fs_wait_on_page_writeback(ipage, NODE); name_hash = f2fs_dentry_hash(name); make_dentry_ptr(&d, (void *)dentry_blk, 2); - f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); + f2fs_update_dentry(inode, &d, name, name_hash, bit_pos); set_page_dirty(ipage); /* we don't need to mark_inode_dirty now */ - if (inode) { - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); - } + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); update_parent_metadata(dir, inode, 0); fail: - if (inode) - up_write(&F2FS_I(inode)->i_sem); + up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode(dir, ipage); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 52b87251..4398cef1 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -188,44 +188,6 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); } -static int __recover_dot_dentries(struct inode *dir, nid_t pino) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct qstr dot = {.len = 1, .name = "."}; - struct qstr dotdot = {.len = 2, .name = ".."}; - struct f2fs_dir_entry *de; - struct page *page; - int err = 0; - - f2fs_lock_op(sbi); - - de = f2fs_find_entry(dir, &dot, &page, 0); - f2fs_dentry_kunmap(dir, page); - f2fs_put_page(page, 0); - - if (de) - goto dotdot; - - err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); - if (err) - goto out; -dotdot: - de = f2fs_find_entry(dir, &dotdot, &page, 0); - f2fs_dentry_kunmap(dir, page); - f2fs_put_page(page, 0); - - if (!de) - err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); -out: - if (!err) { - clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS); - mark_inode_dirty(dir); - } - - f2fs_unlock_op(sbi); - return err; -} - static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -245,16 +207,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); - - if (f2fs_has_inline_dots(inode)) { - int err; - - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) { - iget_failed(inode); - return ERR_PTR(err); - } - } } return d_splice_alias(inode, dentry); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c8d969c9..19fb476f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -114,7 +114,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) iput(einode); goto retry; } - err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); + err = __f2fs_add_link(dir, &name, inode); if (err) goto out_err; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 591f8c3e..502f28cf 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -178,7 +178,6 @@ struct f2fs_extent { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ -#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) From 1ec6357fcc72f57ba381003997436e8809d2aa3c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Mar 2015 15:07:16 -0700 Subject: [PATCH 227/321] f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries If f2fs was corrupted with missing dot dentries, it needs to recover them after fsck.f2fs detection. The underlying precedure is: 1. The fsck.f2fs remains F2FS_INLINE_DOTS flag in directory inode, if it detects missing dot dentries. 2. When f2fs looks up the corrupted directory, it triggers f2fs_add_link with proper inode numbers and their dot and dotdot names. 3. Once f2fs recovers the directory without errors, it removes F2FS_INLINE_DOTS finally. [Jaegeuk Kim: fix build failure on ghost] Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 52 ++++++++++++++++++++++------------------- fs/f2fs/f2fs.h | 22 +++++++++++++---- fs/f2fs/inline.c | 29 +++++++++++++---------- fs/f2fs/namei.c | 48 +++++++++++++++++++++++++++++++++++++ fs/f2fs/recovery.c | 2 +- include/linux/f2fs_fs.h | 1 + 6 files changed, 112 insertions(+), 42 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c00418b1..a2a41df1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -64,9 +64,8 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +void set_de_type(struct f2fs_dir_entry *de, umode_t mode) { - umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } @@ -297,7 +296,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, lock_page(page); f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); + set_de_type(de, inode->i_mode); f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; @@ -343,14 +342,14 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent, de->hash_code = 0; de->ino = cpu_to_le32(inode->i_ino); memcpy(d->filename[0], ".", 1); - set_de_type(de, inode); + set_de_type(de, inode->i_mode); de = &d->dentry[1]; de->hash_code = 0; de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); memcpy(d->filename[1], "..", 2); - set_de_type(de, inode); + set_de_type(de, inode->i_mode); test_and_set_bit_le(0, (void *)d->bitmap); test_and_set_bit_le(1, (void *)d->bitmap); @@ -447,7 +446,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); @@ -462,7 +461,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } @@ -486,7 +485,7 @@ int room_for_filename(const void *bitmap, int slots, int max_slots) goto next; } -void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, +void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos) { @@ -498,8 +497,8 @@ void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, de->hash_code = name_hash; de->name_len = cpu_to_le16(name->len); memcpy(d->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); + de->ino = cpu_to_le32(ino); + set_de_type(de, mode); for (i = 0; i < slots; i++) test_and_set_bit_le(bit_pos + i, (void *)d->bitmap); } @@ -509,7 +508,7 @@ void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, * f2fs_unlock_op(). */ int __f2fs_add_link(struct inode *dir, const struct qstr *name, - struct inode *inode) + struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; @@ -522,11 +521,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(namelen); - struct page *page; + struct page *page = NULL; int err = 0; if (f2fs_has_inline_dentry(dir)) { - err = f2fs_add_inline_entry(dir, name, inode); + err = f2fs_add_inline_entry(dir, name, inode, ino, mode); if (!err || err != -EAGAIN) return err; else @@ -576,26 +575,31 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; + if (inode) { + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, NULL); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } } make_dentry_ptr(&d, (void *)dentry_blk, 1); - f2fs_update_dentry(inode, &d, name, dentry_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos); set_page_dirty(dentry_page); - /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); + if (inode) { + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + } update_parent_metadata(dir, inode, current_depth); fail: - up_write(&F2FS_I(inode)->i_sem); + if (inode) + up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode_page(dir); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 69a43a23..83380e21 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1245,6 +1245,7 @@ enum { FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ + FI_INLINE_DOTS, /* indicate inline dot dentries */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1281,6 +1282,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_DENTRY); if (ri->i_inline & F2FS_DATA_EXIST) set_inode_flag(fi, FI_DATA_EXIST); + if (ri->i_inline & F2FS_INLINE_DOTS) + set_inode_flag(fi, FI_INLINE_DOTS); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1296,6 +1299,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(fi, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; + if (is_inode_flag_set(fi, FI_INLINE_DOTS)) + ri->i_inline |= F2FS_INLINE_DOTS; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1341,6 +1346,11 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); } +static inline int f2fs_has_inline_dots(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1439,7 +1449,7 @@ struct dentry *f2fs_get_parent(struct dentry *child); * dir.c */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; -void set_de_type(struct f2fs_dir_entry *, struct inode *); +void set_de_type(struct f2fs_dir_entry *, umode_t); struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, struct f2fs_dentry_ptr *, unsigned int); bool f2fs_fill_dentries(struct file *, void *, filldir_t, @@ -1458,9 +1468,10 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); -void f2fs_update_dentry(struct inode *, struct f2fs_dentry_ptr *, +void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, const struct qstr *, f2fs_hash_t , unsigned int); -int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); +int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, + umode_t); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); @@ -1470,7 +1481,7 @@ bool f2fs_empty_dir(struct inode *); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, - inode); + inode, inode->i_ino, inode->i_mode); } /* @@ -1791,7 +1802,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, struct page **, unsigned int); struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); -int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, + nid_t, umode_t); void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c85a7a36..b972c038 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -391,7 +391,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, } int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, - struct inode *inode) + struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; @@ -401,7 +401,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, struct f2fs_inline_dentry *dentry_blk = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(namelen); - struct page *page; + struct page *page = NULL; int err = 0; ipage = get_node_page(sbi, dir->i_ino); @@ -418,29 +418,34 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, goto out; } - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, ipage); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; + if (inode) { + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, ipage); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } } f2fs_wait_on_page_writeback(ipage, NODE); name_hash = f2fs_dentry_hash(name); make_dentry_ptr(&d, (void *)dentry_blk, 2); - f2fs_update_dentry(inode, &d, name, name_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); set_page_dirty(ipage); /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); + if (inode) { + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + } update_parent_metadata(dir, inode, 0); fail: - up_write(&F2FS_I(inode)->i_sem); + if (inode) + up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode(dir, ipage); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4398cef1..fee1f2ba 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -188,6 +188,44 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); } +static int __recover_dot_dentries(struct inode *dir, nid_t pino) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct qstr dot = {.len = 1, .name = "."}; + struct qstr dotdot = {.len = 2, .name = ".."}; + struct f2fs_dir_entry *de; + struct page *page; + int err = 0; + + f2fs_lock_op(sbi); + + de = f2fs_find_entry(dir, &dot, &page, 0); + if (de) { + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + } else { + err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); + if (err) + goto out; + } + + de = f2fs_find_entry(dir, &dotdot, &page, 0); + if (de) { + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + } else { + err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); + } +out: + if (!err) { + clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS); + mark_inode_dirty(dir); + } + + f2fs_unlock_op(sbi); + return err; +} + static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -207,6 +245,16 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); + + if (f2fs_has_inline_dots(inode)) { + int err; + + err = __recover_dot_dentries(inode, dir->i_ino); + if (err) { + iget_failed(inode); + return ERR_PTR(err); + } + } } return d_splice_alias(inode, dentry); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 19fb476f..c8d969c9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -114,7 +114,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) iput(einode); goto retry; } - err = __f2fs_add_link(dir, &name, inode); + err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); if (err) goto out_err; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 502f28cf..591f8c3e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -178,6 +178,7 @@ struct f2fs_extent { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) From af50d978fbac1bb310ef115378cfd4ecef991c43 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Mar 2015 15:23:45 -0700 Subject: [PATCH 228/321] f2fs: assign parent's i_mode for empty dir When assigning i_mode for dotdot, it needs to assign parent's i_mode. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a2a41df1..24ce33ea 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -349,7 +349,7 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent, de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); memcpy(d->filename[1], "..", 2); - set_de_type(de, inode->i_mode); + set_de_type(de, parent->i_mode); test_and_set_bit_le(0, (void *)d->bitmap); test_and_set_bit_le(1, (void *)d->bitmap); From bede0675fd8575d38f5fd039473d526eb886265f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Apr 2015 13:37:53 -0700 Subject: [PATCH 229/321] - f2fs: flush symlink path to avoid broken symlink after POR This patch tries to avoid broken symlink case after POR in best effort. This results in performance regression. But, if f2fs has inline_data and the target path is under 3KB-sized long, the page would be stored in its inode_block, so that there would be no performance regression. Note that, if user wants to keep this file atomically, it needs to trigger dir->fsync. And, there is still a hole to produce broken symlink. Change-Id: I5a1d1a118d55a6cdc314c121964d349b288453d4 Signed-off-by: Jaegeuk Kim Reviewed-on: http://gerrit.mot.com/735482 Tested-by: Jira Key SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Reviewed-by: Jiang-Li Yuan Reviewed-by: Jaegeuk Kim Submit-Approved: Jira Key --- fs/f2fs/namei.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index fee1f2ba..97a0605d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -325,6 +325,17 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); unlock_new_inode(inode); + /* + * Let's flush symlink data in order to avoid broken symlink as much as + * possible. Nevertheless, fsyncing is the best way, but there is no + * way to get a file descriptor in order to flush that. + * + * Note that, it needs to do dir->fsync to make this recoverable. + * If the symlink path is stored into inline_data, there is no + * performance regression. + */ + filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1); + if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); return err; From bfa80ea101998420b0e374765d5932c974a4e0cf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Apr 2015 13:49:55 -0700 Subject: [PATCH 230/321] - f2fs: avoid abnormal behavior on broken symlink When f2fs_symlink was triggered and checkpoint was done before syncing its link path, f2fs can get broken symlink like "xxx -> \0\0\0". This incurs abnormal path_walk by VFS. Change-Id: I227859829a1bef0928710cc72d1e1585d90533ed Signed-off-by: Jaegeuk Kim Reviewed-on: http://gerrit.mot.com/735483 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Reviewed-by: Christopher Fries Tested-by: Jira Key Reviewed-by: Jaegeuk Kim Submit-Approved: Jira Key --- fs/f2fs/namei.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 97a0605d..bfc71c17 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -296,6 +297,23 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) return err; } +static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct page *page; + + page = page_follow_link_light(dentry, nd); + if (IS_ERR(page)) + return page; + + /* this is broken symlink case */ + if (*nd_get_link(nd) == 0) { + kunmap(page); + page_cache_release(page); + return ERR_PTR(-ENOENT); + } + return page; +} + static int f2fs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { @@ -580,7 +598,7 @@ const struct inode_operations f2fs_dir_inode_operations = { const struct inode_operations f2fs_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = page_follow_link_light, + .follow_link = f2fs_follow_link, .put_link = page_put_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, From f8c203210d345f24249594265237f0264c1ccc3d Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 13 Apr 2015 21:48:06 +0900 Subject: [PATCH 231/321] f2fs: change 0 to false for bool type in the f2fs_fill_super function, variable "retry" is bool type i think that it should be set as false. Signed-off-by: Taehee Yoo Signed-off-by: Jaegeuk Kim Change-Id: I2edda37a4d21e1e7be773c253ab9c91b6d7fdcfc Reviewed-on: http://gerrit.mot.com/740756 SME-Granted: SME Approvals Granted Submit-Approved: Jira Key Tested-by: Jira Key SLTApproved: Slta Waiver Reviewed-by: Igor Kovalenko --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 45aa843f..160b8834 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1230,7 +1230,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* give only one another chance */ if (retry) { - retry = 0; + retry = false; shrink_dcache_sb(sb); goto try_onemore; } From 334998394bafb5076b21cfa1f5e6b8f53c47e427 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Apr 2015 17:03:53 -0700 Subject: [PATCH 232/321] f2fs: pass checkpoint reason on roll-forward recovery This patch adds CP_RECOVERY to remain recovery information for checkpoint. And, it makes sure writing checkpoint in this case. Signed-off-by: Jaegeuk Kim Change-Id: Id189c33fc7540482dba18db6e988bbaf977a17ed Reviewed-on: http://gerrit.mot.com/740757 Submit-Approved: Jira Key Tested-by: Jira Key SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Reviewed-by: Igor Kovalenko --- fs/f2fs/checkpoint.c | 6 +++++- fs/f2fs/f2fs.h | 1 + fs/f2fs/recovery.c | 2 +- include/trace/events/f2fs.h | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 30e3ef84..7b20cf06 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1052,7 +1052,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && - cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) + (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC)) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; @@ -1087,6 +1087,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); + + if (cpc->reason == CP_RECOVERY) + f2fs_msg(sbi->sb, KERN_NOTICE, + "checkpoint: version = %llx", ckpt_ver); out: mutex_unlock(&sbi->cp_mutex); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 83380e21..47472872 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -103,6 +103,7 @@ enum { CP_UMOUNT, CP_FASTBOOT, CP_SYNC, + CP_RECOVERY, CP_DISCARD, }; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c8d969c9..c8c4f2aa 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -564,7 +564,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) mutex_unlock(&sbi->cp_mutex); } else if (need_writecp) { struct cp_control cpc = { - .reason = CP_SYNC, + .reason = CP_RECOVERY, }; mutex_unlock(&sbi->cp_mutex); write_checkpoint(sbi, &cpc); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 4ad19239..f622e782 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -78,6 +78,7 @@ { CP_UMOUNT, "Umount" }, \ { CP_FASTBOOT, "Fastboot" }, \ { CP_SYNC, "Sync" }, \ + { CP_RECOVERY, "Recovery" }, \ { CP_DISCARD, "Discard" }) struct victim_sel_policy; From e7b3fb76dc10489709dc0c794709e23dbc46b94a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 8 Jun 2015 17:51:10 -0700 Subject: [PATCH 233/321] f2fs: drop the volatile_write flag only When aborting volatile_writes, let's drop its flag and give up any further volatile_writes. Change-Id: If7e415824494e8cc99514d28074ecb31c718a47d Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 03059664..8ec6f937 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1052,11 +1052,9 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); } - if (f2fs_is_volatile_file(inode)) { + if (f2fs_is_volatile_file(inode)) clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - filemap_fdatawrite(inode->i_mapping); - set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - } + mnt_drop_write_file(filp); return ret; } From 962f93e6796e5947a61084c48809d8a83cd513d1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Apr 2015 10:40:54 -0700 Subject: [PATCH 234/321] Revert "f2fs: enhance multi-threads performance" This reports performance regression by Yuanhan Liu. The basic idea was to reduce one-point mutex, but it turns out this causes another contention like context swithes. https://lkml.org/lkml/2015/4/21/11 Until finishing the analysis on this issue, I'd like to revert this for a while. This reverts commit 78373b7319abdf15050af5b1632c4c8b8b398f33. --- fs/f2fs/data.c | 7 +++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 1 + 3 files changed, 9 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index dbbab45a..9730ee72 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + bool locked = false; int ret; long diff; @@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, DATA, wbc); + if (!S_ISDIR(inode->i_mode)) { + mutex_lock(&sbi->writepages); + locked = true; + } ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); + if (locked) + mutex_unlock(&sbi->writepages); f2fs_submit_merged_bio(sbi, DATA, WRITE); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 47472872..bb2df009 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -624,6 +624,7 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ + struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 160b8834..b2dd1b01 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1035,6 +1035,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->raw_super = raw_super; sbi->raw_super_buf = raw_super_buf; mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); clear_sbi_flag(sbi, SBI_POR_DOING); From 17b7deb05f62a84c2f761f932b159a8979a810a5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 18 Apr 2015 18:05:36 +0800 Subject: [PATCH 235/321] f2fs: use is_valid_blkaddr to verify blkaddr for readability Export is_valid_blkaddr() and use it to replace some codes for readability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +-- fs/f2fs/f2fs.h | 1 + fs/f2fs/recovery.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7b20cf06..dcfbe80d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -78,8 +78,7 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) return page; } -static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi, - block_t blkaddr, int type) +bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { switch (type) { case META_NAT: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bb2df009..630fc6c8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1580,6 +1580,7 @@ void destroy_segment_manager_caches(void); */ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); +bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c8c4f2aa..65821114 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -174,7 +174,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) while (1) { struct fsync_inode_entry *entry; - if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) + if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; page = get_meta_page(sbi, blkaddr); @@ -396,7 +396,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, dest = datablock_addr(page, dn.ofs_in_node); if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && - dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) { + is_valid_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { err = reserve_new_block(&dn); @@ -454,7 +454,7 @@ static int recover_data(struct f2fs_sb_info *sbi, while (1) { struct fsync_inode_entry *entry; - if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) + if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) break; ra_meta_pages_cond(sbi, blkaddr); From bcea0925368bdb4b65c2dd361524468614b82efa Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 21 Apr 2015 15:59:12 +0900 Subject: [PATCH 236/321] f2fs: add offset check routine before punch_hole() in f2fs_fallocate() In the punch_hole(), if offset bigger than inode size, it returns SUCCESS. Then f2fs_fallocate() will update time and dirty mark. In that case, inode has not been modified actually. So I have added offset check routine that prevent to call the punch_hole(). Signed-off-by: Taehee Yoo Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8ec6f937..f66da881 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -735,10 +735,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; - /* skip punching hole beyond i_size */ - if (offset >= inode->i_size) - return ret; - if (f2fs_has_inline_data(inode)) { ret = f2fs_convert_inline_inode(inode); if (ret) @@ -847,15 +843,19 @@ static long f2fs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); - long ret; + long ret = 0; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); - if (mode & FALLOC_FL_PUNCH_HOLE) + if (mode & FALLOC_FL_PUNCH_HOLE) { + if (offset >= inode->i_size) + goto out; + ret = punch_hole(inode, offset, len); + } else ret = expand_inode_data(inode, offset, len, mode); @@ -864,6 +864,7 @@ static long f2fs_fallocate(struct file *file, int mode, mark_inode_dirty(inode); } +out: mutex_unlock(&inode->i_mutex); trace_f2fs_fallocate(inode, mode, offset, len, ret); From 87c0b3a0bf7a9c493ab469c288813e5d239b3a69 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 18 Apr 2015 18:06:49 +0800 Subject: [PATCH 237/321] f2fs: make has_fsynced_inode static has_fsynced_inode() has no other caller out of node.c, make it static. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - fs/f2fs/node.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 630fc6c8..57d3162f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1506,7 +1506,6 @@ struct node_info; bool available_free_memory(struct f2fs_sb_info *, int); bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); -bool has_fsynced_inode(struct f2fs_sb_info *, nid_t); bool need_inode_block_update(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 32f4934c..a7170981 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -209,7 +209,7 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) return is_cp; } -bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) +static bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; From f08d65bbd7a2228bbc48e2e28970656c7176c5db Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 11:52:23 -0700 Subject: [PATCH 238/321] f2fs: add missing version info in superblock The mkfs.f2fs remains kernel version in superblock, but f2fs module has not added that so far. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 591f8c3e..8d345c24 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -50,6 +50,8 @@ #define MAX_ACTIVE_NODE_LOGS 8 #define MAX_ACTIVE_DATA_LOGS 8 +#define VERSION_LEN 256 + /* * For superblock */ @@ -86,6 +88,9 @@ struct f2fs_super_block { __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ __le32 cp_payload; + __u8 version[VERSION_LEN]; /* the kernel version */ + __u8 init_version[VERSION_LEN]; /* the initial kernel version */ + __u8 reserved[892]; /* valid reserved region */ } __packed; /* From 8f202ea0457746c5b935f2f6cbf93eae9455b043 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 13:44:41 -0700 Subject: [PATCH 239/321] f2fs: move existing definitions into f2fs.h This patch moves some inode-related definitions from node.h to f2fs.h to add new features. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 22 ++++++++++++++++++++++ fs/f2fs/node.h | 22 ---------------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 57d3162f..c440859a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -319,6 +319,13 @@ struct extent_tree { #define FADVISE_COLD_BIT 0x01 #define FADVISE_LOST_PINO_BIT 0x02 +#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) +#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) +#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) + #define DEF_DIR_LEVEL 0 struct f2fs_inode_info { @@ -1390,6 +1397,21 @@ static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) kunmap(page); } +static inline int is_file(struct inode *inode, int type) +{ + return F2FS_I(inode)->i_advise & type; +} + +static inline void set_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise |= type; +} + +static inline void clear_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise &= ~type; +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index c56026f1..7427e956 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -343,28 +343,6 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold node blocks in their node footer * - Mark cold data pages in page cache */ -static inline int is_file(struct inode *inode, int type) -{ - return F2FS_I(inode)->i_advise & type; -} - -static inline void set_file(struct inode *inode, int type) -{ - F2FS_I(inode)->i_advise |= type; -} - -static inline void clear_file(struct inode *inode, int type) -{ - F2FS_I(inode)->i_advise &= ~type; -} - -#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) -#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) -#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) -#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) -#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) -#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) - static inline int is_cold_data(struct page *page) { return PageChecked(page); From f0c3e6597ace55f09a2919ef286a13aaabb913b2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Apr 2015 15:10:36 -0700 Subject: [PATCH 240/321] f2fs: add feature facility in superblock This patch introduces a feature in superblock, which will indicate any new features for f2fs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 +++++++ include/linux/f2fs_fs.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c440859a..3759fc21 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -70,6 +70,13 @@ struct f2fs_mount_info { unsigned int opt; }; +#define F2FS_HAS_FEATURE(sb, mask) \ + ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) +#define F2FS_SET_FEATURE(sb, mask) \ + F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask) +#define F2FS_CLEAR_FEATURE(sb, mask) \ + F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask) + #define CRCPOLY_LE 0xedb88320 static inline __u32 f2fs_crc32(void *buf, size_t len) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8d345c24..d44e97f2 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -90,7 +90,8 @@ struct f2fs_super_block { __le32 cp_payload; __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ - __u8 reserved[892]; /* valid reserved region */ + __le32 feature; /* defined features */ + __u8 reserved[888]; /* valid reserved region */ } __packed; /* From a175f43093afac37670c1afdac5c48ed07490b32 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 6 Apr 2015 19:55:34 -0700 Subject: [PATCH 241/321] f2fs: add f2fs_map_blocks This patch introduces f2fs_map_blocks structure likewise ext4_map_blocks. Now, f2fs uses f2fs_map_blocks when handling get_block. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 72 +++++++++++++++++++++---------------- fs/f2fs/f2fs.h | 16 +++++++++ include/trace/events/f2fs.h | 28 +++++++-------- 3 files changed, 71 insertions(+), 45 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9730ee72..910a2f65 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -251,19 +251,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) return err; } -static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, - struct extent_info *ei, struct buffer_head *bh_result) -{ - unsigned int blkbits = sb->s_blocksize_bits; - size_t max_size = bh_result->b_size; - size_t mapped_size; - - clear_buffer_new(bh_result); - map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); - mapped_size = (ei->fofs + ei->len - pgofs) << blkbits; - bh_result->b_size = min(max_size, mapped_size); -} - static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { @@ -1208,18 +1195,18 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, } /* - * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. + * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with + * f2fs_map_blocks structure. * If original data blocks are allocated, then give them to blockdev. * Otherwise, * a. preallocate requested block addresses * b. do not use extent cache for better performance * c. give the block addresses to blockdev */ -static int __get_data_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create, bool fiemap) +static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, + int create, bool fiemap) { - unsigned int blkbits = inode->i_sb->s_blocksize_bits; - unsigned maxblocks = bh_result->b_size >> blkbits; + unsigned int maxblocks = map->m_len; struct dnode_of_data dn; int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; pgoff_t pgofs, end_offset; @@ -1227,11 +1214,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock, struct extent_info ei; bool allocated = false; - /* Get the page offset from the block offset(iblock) */ - pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); + map->m_len = 0; + map->m_flags = 0; + + /* it only supports block size == page size */ + pgofs = (pgoff_t)map->m_lblk; if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) { - f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result); + map->m_pblk = ei.blk + pgofs - ei.fofs; + map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); + map->m_flags = F2FS_MAP_MAPPED; goto out; } @@ -1250,21 +1242,21 @@ static int __get_data_block(struct inode *inode, sector_t iblock, goto put_out; if (dn.data_blkaddr != NULL_ADDR) { - clear_buffer_new(bh_result); - map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + map->m_flags = F2FS_MAP_MAPPED; + map->m_pblk = dn.data_blkaddr; } else if (create) { err = __allocate_data_block(&dn); if (err) goto put_out; allocated = true; - set_buffer_new(bh_result); - map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED; + map->m_pblk = dn.data_blkaddr; } else { goto put_out; } end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); - bh_result->b_size = (((size_t)1) << blkbits); + map->m_len = 1; dn.ofs_in_node++; pgofs++; @@ -1288,22 +1280,22 @@ static int __get_data_block(struct inode *inode, sector_t iblock, end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); } - if (maxblocks > (bh_result->b_size >> blkbits)) { + if (maxblocks > map->m_len) { block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); if (blkaddr == NULL_ADDR && create) { err = __allocate_data_block(&dn); if (err) goto sync_out; allocated = true; - set_buffer_new(bh_result); + map->m_flags |= F2FS_MAP_NEW; blkaddr = dn.data_blkaddr; } /* Give more consecutive addresses for the readahead */ - if (blkaddr == (bh_result->b_blocknr + ofs)) { + if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) { ofs++; dn.ofs_in_node++; pgofs++; - bh_result->b_size += (((size_t)1) << blkbits); + map->m_len++; goto get_next; } } @@ -1316,10 +1308,28 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (create) f2fs_unlock_op(F2FS_I_SB(inode)); out: - trace_f2fs_get_data_block(inode, iblock, bh_result, err); + trace_f2fs_map_blocks(inode, map, err); return err; } +static int __get_data_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create, bool fiemap) +{ + struct f2fs_map_blocks map; + int ret; + + map.m_lblk = iblock; + map.m_len = bh->b_size >> inode->i_blkbits; + + ret = f2fs_map_blocks(inode, &map, create, fiemap); + if (!ret) { + map_bh(bh, inode->i_sb, map.m_pblk); + bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; + bh->b_size = map.m_len << inode->i_blkbits; + } + return ret; +} + static int get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3759fc21..dc56bd48 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -320,6 +320,22 @@ struct extent_tree { unsigned int count; /* # of extent node in rb-tree*/ }; +/* + * This structure is taken from ext4_map_blocks. + * + * Note that, however, f2fs uses NEW and MAPPED flags for f2fs_map_blocks(). + */ +#define F2FS_MAP_NEW (1 << BH_New) +#define F2FS_MAP_MAPPED (1 << BH_Mapped) +#define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED) + +struct f2fs_map_blocks { + block_t m_pblk; + block_t m_lblk; + unsigned int m_len; + unsigned int m_flags; +}; + /* * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index f622e782..e5a03ef1 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -82,6 +82,7 @@ { CP_DISCARD, "Discard" }) struct victim_sel_policy; +struct f2fs_map_blocks; DECLARE_EVENT_CLASS(f2fs__inode, @@ -446,36 +447,35 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, __entry->err) ); -TRACE_EVENT(f2fs_get_data_block, - TP_PROTO(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int ret), +TRACE_EVENT(f2fs_map_blocks, + TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret), - TP_ARGS(inode, iblock, bh, ret), + TP_ARGS(inode, map, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(sector_t, iblock) - __field(sector_t, bh_start) - __field(size_t, bh_size) + __field(block_t, m_lblk) + __field(block_t, m_pblk) + __field(unsigned int, m_len) __field(int, ret) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->iblock = iblock; - __entry->bh_start = bh->b_blocknr; - __entry->bh_size = bh->b_size; + __entry->m_lblk = map->m_lblk; + __entry->m_pblk = map->m_pblk; + __entry->m_len = map->m_len; __entry->ret = ret; ), TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " - "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d", + "start blkaddr = 0x%llx, len = 0x%llx, err = %d", show_dev_ino(__entry), - (unsigned long long)__entry->iblock, - (unsigned long long)__entry->bh_start, - (unsigned long long)__entry->bh_size, + (unsigned long long)__entry->m_lblk, + (unsigned long long)__entry->m_pblk, + (unsigned long long)__entry->m_len, __entry->ret) ); From 9c9af69234cd54c6e7477bea0001d5c3f36c5344 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 18:49:51 -0700 Subject: [PATCH 242/321] f2fs: introduce f2fs_commit_super This patch introduces f2fs_commit_super to write updated superblock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dc56bd48..6edcfe2f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1534,6 +1534,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) /* * super.c */ +int f2fs_commit_super(struct f2fs_sb_info *); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b2dd1b01..85841687 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -966,6 +966,30 @@ static int read_raw_super_block(struct super_block *sb, return 0; } +int f2fs_commit_super(struct f2fs_sb_info *sbi) +{ + struct buffer_head *sbh = sbi->raw_super_buf; + sector_t block = sbh->b_blocknr; + int err; + + /* write back-up superblock first */ + sbh->b_blocknr = block ? 0 : 1; + mark_buffer_dirty(sbh); + err = sync_dirty_buffer(sbh); + + sbh->b_blocknr = block; + if (err) + goto out; + + /* write current valid superblock */ + mark_buffer_dirty(sbh); + err = sync_dirty_buffer(sbh); +out: + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); + return err; +} + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; From 68d008d6ace48869644ef2417ccf694e336bdfb0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Apr 2015 11:20:42 -0700 Subject: [PATCH 243/321] f2fs: expose f2fs_mpage_readpages This patch implements f2fs_mpage_readpages for further optimization on encryption support. The basic code was taken from fs/mpage.c, and changed to be simple by adjusting that block_size is equal to page_size in f2fs. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/data.c --- fs/f2fs/data.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 910a2f65..28b13e13 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "f2fs.h" #include "node.h" @@ -47,6 +49,30 @@ static void f2fs_read_end_io(struct bio *bio, int err) bio_put(bio); } +/* + * I/O completion handler for multipage BIOs. + * copied from fs/mpage.c + */ +static void mpage_end_io(struct bio *bio, int err) +{ + struct bio_vec *bv; + int i; + + bio_for_each_segment_all(bv, bio, i) { + struct page *page = bv->bv_page; + + if (!err) { + SetPageUptodate(page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + unlock_page(page); + } + + bio_put(bio); +} + static void f2fs_write_end_io(struct bio *bio, int err) { struct f2fs_sb_info *sbi = bio->bi_private; @@ -1349,6 +1375,133 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, start, len, get_data_block_fiemap); } +/* + * This function was originally taken from fs/mpage.c, and customized for f2fs. + * Major change was from block_size == page_size in f2fs by default. + */ +static int f2fs_mpage_readpages(struct address_space *mapping, + struct list_head *pages, struct page *page, + unsigned nr_pages) +{ + struct bio *bio = NULL; + unsigned page_idx; + sector_t last_block_in_bio = 0; + struct inode *inode = mapping->host; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + sector_t block_in_file; + sector_t last_block; + sector_t last_block_in_file; + sector_t block_nr; + struct block_device *bdev = inode->i_sb->s_bdev; + struct f2fs_map_blocks map; + + map.m_pblk = 0; + map.m_lblk = 0; + map.m_len = 0; + map.m_flags = 0; + + for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { + + prefetchw(&page->flags); + if (pages) { + page = list_entry(pages->prev, struct page, lru); + list_del(&page->lru); + if (add_to_page_cache_lru(page, mapping, + page->index, GFP_KERNEL)) + goto next_page; + } + + block_in_file = (sector_t)page->index; + last_block = block_in_file + nr_pages; + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + blkbits; + if (last_block > last_block_in_file) + last_block = last_block_in_file; + + /* + * Map blocks using the previous result first. + */ + if ((map.m_flags & F2FS_MAP_MAPPED) && + block_in_file > map.m_lblk && + block_in_file < (map.m_lblk + map.m_len)) + goto got_it; + + /* + * Then do more f2fs_map_blocks() calls until we are + * done with this page. + */ + map.m_flags = 0; + + if (block_in_file < last_block) { + map.m_lblk = block_in_file; + map.m_len = last_block - block_in_file; + + if (f2fs_map_blocks(inode, &map, 0, false)) + goto set_error_page; + } +got_it: + if ((map.m_flags & F2FS_MAP_MAPPED)) { + block_nr = map.m_pblk + block_in_file - map.m_lblk; + SetPageMappedToDisk(page); + + if (!PageUptodate(page) && !cleancache_get_page(page)) { + SetPageUptodate(page); + goto confused; + } + } else { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + unlock_page(page); + goto next_page; + } + + /* + * This page will go to BIO. Do we need to send this + * BIO off first? + */ + if (bio && (last_block_in_bio != block_nr - 1)) { +submit_and_realloc: + submit_bio(READ, bio); + bio = NULL; + } + if (bio == NULL) { + bio = bio_alloc(GFP_KERNEL, + min_t(int, nr_pages, bio_get_nr_vecs(bdev))); + if (!bio) + goto set_error_page; + bio->bi_bdev = bdev; + bio->bi_sector = SECTOR_FROM_BLOCK(block_nr); + bio->bi_end_io = mpage_end_io; + bio->bi_private = NULL; + } + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + last_block_in_bio = block_nr; + goto next_page; +set_error_page: + SetPageError(page); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + unlock_page(page); + goto next_page; +confused: + if (bio) { + submit_bio(READ, bio); + bio = NULL; + } + unlock_page(page); +next_page: + if (pages) + page_cache_release(page); + } + BUG_ON(pages && !list_empty(pages)); + if (bio) + submit_bio(READ, bio); + return 0; +} + static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; @@ -1360,8 +1513,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = mpage_readpage(page, get_data_block); - + ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1); return ret; } @@ -1375,7 +1527,7 @@ static int f2fs_read_data_pages(struct file *file, if (f2fs_has_inline_data(inode)) return 0; - return mpage_readpages(mapping, pages, nr_pages, get_data_block); + return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); } int do_write_data_page(struct page *page, struct f2fs_io_info *fio) From 0dc9dcf03047c3a76e335dc7084d2c74543a8166 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Apr 2015 11:40:27 -0700 Subject: [PATCH 244/321] f2fs: clean up f2fs_lookup This patch cleans up to avoid deep indentation. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/namei.c Change-Id: I5c9dec952cfd38553fc2cbbd1f23f003ec43a6f7 --- fs/f2fs/namei.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index bfc71c17..f94afd9f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -233,31 +233,32 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct f2fs_dir_entry *de; struct page *page; + nid_t ino; if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); de = f2fs_find_entry(dir, &dentry->d_name, &page, flags); - if (de) { - nid_t ino = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, page); - f2fs_put_page(page, 0); + if (!de) + return d_splice_alias(inode, dentry); - inode = f2fs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + ino = le32_to_cpu(de->ino); + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); - if (f2fs_has_inline_dots(inode)) { - int err; + inode = f2fs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + if (f2fs_has_inline_dots(inode)) { + int err; - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) { - iget_failed(inode); - return ERR_PTR(err); - } + err = __recover_dot_dentries(inode, dir->i_ino); + if (err) { + iget_failed(inode); + return ERR_PTR(err); } } - return d_splice_alias(inode, dentry); } From 16b0b98ebd6272142b1dd82d5b246bd72c51226e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Apr 2015 10:27:21 -0700 Subject: [PATCH 245/321] f2fs: add f2fs_may_inline_{data, dentry} This patch adds f2fs_may_inline_data and f2fs_may_inline_dentry. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 2 +- fs/f2fs/inline.c | 13 ++++++++++++- fs/f2fs/namei.c | 4 ++-- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6edcfe2f..562913fb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1838,7 +1838,8 @@ extern struct kmem_cache *inode_entry_slab; /* * inline.c */ -bool f2fs_may_inline(struct inode *); +bool f2fs_may_inline_data(struct inode *); +bool f2fs_may_inline_dentry(struct inode *); void read_inline_data(struct page *, struct page *); bool truncate_inline_inode(struct page *, u64); int f2fs_read_inline_data(struct inode *, struct page *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f66da881..f372b9a8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -578,7 +578,7 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); /* we should check inline_data size */ - if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) { + if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { if (f2fs_convert_inline_inode(inode)) return; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index b972c038..89e2268e 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -13,7 +13,7 @@ #include "f2fs.h" -bool f2fs_may_inline(struct inode *inode) +bool f2fs_may_inline_data(struct inode *inode) { if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) return false; @@ -30,6 +30,17 @@ bool f2fs_may_inline(struct inode *inode) return true; } +bool f2fs_may_inline_dentry(struct inode *inode) +{ + if (!test_opt(F2FS_I_SB(inode), INLINE_DENTRY)) + return false; + + if (!S_ISDIR(inode->i_mode)) + return false; + + return true; +} + void read_inline_data(struct page *page, struct page *ipage) { void *src_addr, *dst_addr; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f94afd9f..0a67290a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -57,9 +57,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto out; } - if (f2fs_may_inline(inode)) + if (f2fs_may_inline_data(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode)) + if (f2fs_may_inline_dentry(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); trace_f2fs_new_inode(inode, 0); From 24e4084fcb2f32de8f84d90a34cdf7fff4040f8d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Apr 2015 14:38:15 -0700 Subject: [PATCH 246/321] f2fs: add sbi and page pointer in f2fs_io_info This patch adds f2fs_sb_info and page pointers in f2fs_io_info structure. With this change, we can reduce a lot of parameters for IO functions. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 9 +++++++-- fs/f2fs/data.c | 47 +++++++++++++++++++++++++++----------------- fs/f2fs/f2fs.h | 18 ++++++++--------- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 4 +++- fs/f2fs/inline.c | 4 +++- fs/f2fs/node.c | 8 ++++++-- fs/f2fs/segment.c | 38 ++++++++++++++++++----------------- fs/f2fs/super.c | 2 +- fs/f2fs/trace.c | 6 +++--- fs/f2fs/trace.h | 4 ++-- 11 files changed, 83 insertions(+), 59 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dcfbe80d..0dba0d5f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -52,6 +52,7 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) struct address_space *mapping = META_MAPPING(sbi); struct page *page; struct f2fs_io_info fio = { + .sbi = sbi, .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO, .blk_addr = index, @@ -65,7 +66,9 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) if (PageUptodate(page)) goto out; - if (f2fs_submit_page_bio(sbi, page, &fio)) + fio.page = page; + + if (f2fs_submit_page_bio(&fio)) goto repeat; lock_page(page); @@ -118,6 +121,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type struct page *page; block_t blkno = start; struct f2fs_io_info fio = { + .sbi = sbi, .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; @@ -161,7 +165,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type continue; } - f2fs_submit_page_mbio(sbi, page, &fio); + fio.page = page; + f2fs_submit_page_mbio(&fio); f2fs_put_page(page, 0); } out: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 28b13e13..3a2c82e2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -159,16 +159,16 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, * Fill the locked page with data located in the block address. * Return unlocked page. */ -int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_io_info *fio) +int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; + struct page *page = fio->page; trace_f2fs_submit_page_bio(page, fio); - f2fs_trace_ios(page, fio, 0); + f2fs_trace_ios(fio, 0); /* Allocate a new bio */ - bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); + bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw)); if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { bio_put(bio); @@ -180,9 +180,9 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, return 0; } -void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_io_info *fio) +void f2fs_submit_page_mbio(struct f2fs_io_info *fio) { + struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; bool is_read = is_read_io(fio->rw); @@ -207,17 +207,17 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, io->fio = *fio; } - if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < + if (bio_add_page(io->bio, fio->page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { __submit_merged_bio(io); goto alloc_new; } io->last_block_in_bio = fio->blk_addr; - f2fs_trace_ios(page, fio, 0); + f2fs_trace_ios(fio, 0); up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio); + trace_f2fs_submit_page_mbio(fio->page, fio); } /* @@ -926,6 +926,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) struct extent_info ei; int err; struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), .type = DATA, .rw = sync ? READ_SYNC : READA, }; @@ -972,7 +973,8 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) } fio.blk_addr = dn.data_blkaddr; - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); + fio.page = page; + err = f2fs_submit_page_bio(&fio); if (err) return ERR_PTR(err); @@ -999,6 +1001,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct extent_info ei; int err; struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), .type = DATA, .rw = READ_SYNC, }; @@ -1042,7 +1045,8 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) } fio.blk_addr = dn.data_blkaddr; - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); + fio.page = page; + err = f2fs_submit_page_bio(&fio); if (err) return ERR_PTR(err); @@ -1093,11 +1097,13 @@ struct page *get_new_data_page(struct inode *inode, SetPageUptodate(page); } else { struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), .type = DATA, .rw = READ_SYNC, .blk_addr = dn.data_blkaddr, + .page = page, }; - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); + err = f2fs_submit_page_bio(&fio); if (err) goto put_err; @@ -1530,8 +1536,9 @@ static int f2fs_read_data_pages(struct file *file, return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); } -int do_write_data_page(struct page *page, struct f2fs_io_info *fio) +int do_write_data_page(struct f2fs_io_info *fio) { + struct page *page = fio->page; struct inode *inode = page->mapping->host; struct dnode_of_data dn; int err = 0; @@ -1558,11 +1565,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) if (unlikely(fio->blk_addr != NEW_ADDR && !is_cold_data(page) && need_inplace_update(inode))) { - rewrite_data_page(page, fio); + rewrite_data_page(fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); trace_f2fs_do_write_data_page(page, IPU); } else { - write_data_page(page, &dn, fio); + write_data_page(&dn, fio); set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); trace_f2fs_do_write_data_page(page, OPU); @@ -1587,8 +1594,10 @@ static int f2fs_write_data_page(struct page *page, bool need_balance_fs = false; int err = 0; struct f2fs_io_info fio = { + .sbi = sbi, .type = DATA, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .page = page, }; trace_f2fs_writepage(page, DATA); @@ -1618,7 +1627,7 @@ static int f2fs_write_data_page(struct page *page, if (S_ISDIR(inode->i_mode)) { if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; - err = do_write_data_page(page, &fio); + err = do_write_data_page(&fio); goto done; } @@ -1638,7 +1647,7 @@ static int f2fs_write_data_page(struct page *page, if (f2fs_has_inline_data(inode)) err = f2fs_write_inline_data(inode, page); if (err == -EAGAIN) - err = do_write_data_page(page, &fio); + err = do_write_data_page(&fio); f2fs_unlock_op(sbi); done: if (err && err != -ENOENT) @@ -1807,11 +1816,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { struct f2fs_io_info fio = { + .sbi = sbi, .type = DATA, .rw = READ_SYNC, .blk_addr = dn.data_blkaddr, + .page = page, }; - err = f2fs_submit_page_bio(sbi, page, &fio); + err = f2fs_submit_page_bio(&fio); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 562913fb..5f06d4fa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -600,9 +600,11 @@ enum page_type { }; struct f2fs_io_info { + struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ block_t blk_addr; /* block address to be written */ + struct page *page; /* page to be written */ }; #define is_read_io(rw) (((rw) & 1) == READ) @@ -1600,11 +1602,9 @@ void allocate_new_segments(struct f2fs_sb_info *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); void write_meta_page(struct f2fs_sb_info *, struct page *); -void write_node_page(struct f2fs_sb_info *, struct page *, - unsigned int, struct f2fs_io_info *); -void write_data_page(struct page *, struct dnode_of_data *, - struct f2fs_io_info *); -void rewrite_data_page(struct page *, struct f2fs_io_info *); +void write_node_page(unsigned int, struct f2fs_io_info *); +void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); +void rewrite_data_page(struct f2fs_io_info *); void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); void allocate_data_block(struct f2fs_sb_info *, struct page *, @@ -1652,10 +1652,8 @@ void destroy_checkpoint_caches(void); * data.c */ void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); -int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, - struct f2fs_io_info *); -void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, - struct f2fs_io_info *); +int f2fs_submit_page_bio(struct f2fs_io_info *); +void f2fs_submit_page_mbio(struct f2fs_io_info *); void set_data_blkaddr(struct dnode_of_data *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); @@ -1667,7 +1665,7 @@ void f2fs_preserve_extent_tree(struct inode *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); -int do_write_data_page(struct page *, struct f2fs_io_info *); +int do_write_data_page(struct f2fs_io_info *); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); void f2fs_invalidate_page(struct page *, unsigned long); void init_extent_cache_info(struct f2fs_sb_info *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f372b9a8..2fa76f94 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -270,7 +270,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); - f2fs_trace_ios(NULL, NULL, 1); + f2fs_trace_ios(NULL, 1); return ret; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ed58211f..72667a54 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -521,8 +521,10 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static void move_data_page(struct inode *inode, struct page *page, int gc_type) { struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), .type = DATA, .rw = WRITE_SYNC, + .page = page, }; if (gc_type == BG_GC) { @@ -536,7 +538,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) if (clear_page_dirty_for_io(page)) inode_dec_dirty_pages(inode); set_cold_data(page); - do_write_data_page(page, &fio); + do_write_data_page(&fio); clear_cold_data(page); } out: diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 89e2268e..aa9321c4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -106,8 +106,10 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { void *src_addr, *dst_addr; struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(dn->inode), .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, + .page = page, }; int dirty, err; @@ -141,7 +143,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) /* write data page to try to make data consistent */ set_page_writeback(page); fio.blk_addr = dn->data_blkaddr; - write_data_page(page, dn, &fio); + write_data_page(dn, &fio); set_data_blkaddr(dn); f2fs_update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a7170981..0ef4c4de 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -995,8 +995,10 @@ static int read_node_page(struct page *page, int rw) struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; struct f2fs_io_info fio = { + .sbi = sbi, .type = NODE, .rw = rw, + .page = page, }; get_node_info(sbi, page->index, &ni); @@ -1011,7 +1013,7 @@ static int read_node_page(struct page *page, int rw) return LOCKED_PAGE; fio.blk_addr = ni.blk_addr; - return f2fs_submit_page_bio(sbi, page, &fio); + return f2fs_submit_page_bio(&fio); } /* @@ -1295,8 +1297,10 @@ static int f2fs_write_node_page(struct page *page, nid_t nid; struct node_info ni; struct f2fs_io_info fio = { + .sbi = sbi, .type = NODE, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .page = page, }; trace_f2fs_writepage(page, NODE); @@ -1331,7 +1335,7 @@ static int f2fs_write_node_page(struct page *page, set_page_writeback(page); fio.blk_addr = ni.blk_addr; - write_node_page(sbi, page, nid, &fio); + write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5fbc72f4..b5ce9f58 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -275,6 +275,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) struct inmem_pages *cur, *tmp; bool submit_bio = false; struct f2fs_io_info fio = { + .sbi = sbi, .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, }; @@ -300,7 +301,8 @@ void commit_inmem_pages(struct inode *inode, bool abort) if (clear_page_dirty_for_io(cur->page)) inode_dec_dirty_pages(inode); trace_f2fs_commit_inmem_page(cur->page, INMEM); - do_write_data_page(cur->page, &fio); + fio.page = cur->page; + do_write_data_page(&fio); submit_bio = true; } f2fs_put_page(cur->page, 1); @@ -1265,56 +1267,56 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); } -static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_summary *sum, - struct f2fs_io_info *fio) +static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(page, fio->type); + int type = __get_segment_type(fio->page, fio->type); - allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type); + allocate_data_block(fio->sbi, fio->page, fio->blk_addr, + &fio->blk_addr, sum, type); /* writeout dirty page into bdev */ - f2fs_submit_page_mbio(sbi, page, fio); + f2fs_submit_page_mbio(fio); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_io_info fio = { + .sbi = sbi, .type = META, .rw = WRITE_SYNC | REQ_META | REQ_PRIO, .blk_addr = page->index, + .page = page, }; set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, &fio); + f2fs_submit_page_mbio(&fio); } -void write_node_page(struct f2fs_sb_info *sbi, struct page *page, - unsigned int nid, struct f2fs_io_info *fio) +void write_node_page(unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; + set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, &sum, fio); + do_write_page(&sum, fio); } -void write_data_page(struct page *page, struct dnode_of_data *dn, - struct f2fs_io_info *fio) +void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + struct f2fs_sb_info *sbi = fio->sbi; struct f2fs_summary sum; struct node_info ni; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - do_write_page(sbi, page, &sum, fio); + do_write_page(&sum, fio); dn->data_blkaddr = fio->blk_addr; } -void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) +void rewrite_data_page(struct f2fs_io_info *fio) { - stat_inc_inplace_blocks(F2FS_P_SB(page)); - f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio); + stat_inc_inplace_blocks(fio->sbi); + f2fs_submit_page_mbio(fio); } void recover_data_page(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 85841687..138fa938 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -520,7 +520,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) } else { f2fs_balance_fs(sbi); } - f2fs_trace_ios(NULL, NULL, 1); + f2fs_trace_ios(NULL, 1); return 0; } diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 875aa817..145fb659 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -80,7 +80,7 @@ void f2fs_trace_pid(struct page *page) radix_tree_preload_end(); } -void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) +void f2fs_trace_ios(struct f2fs_io_info *fio, int flush) { struct inode *inode; pid_t pid; @@ -91,8 +91,8 @@ void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) return; } - inode = page->mapping->host; - pid = page_private(page); + inode = fio->page->mapping->host; + pid = page_private(fio->page); major = MAJOR(inode->i_sb->s_dev); minor = MINOR(inode->i_sb->s_dev); diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index 1041dbeb..67db24ac 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -33,12 +33,12 @@ struct last_io_info { }; extern void f2fs_trace_pid(struct page *); -extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); +extern void f2fs_trace_ios(struct f2fs_io_info *, int); extern void f2fs_build_trace_ios(void); extern void f2fs_destroy_trace_ios(void); #else #define f2fs_trace_pid(p) -#define f2fs_trace_ios(p, i, n) +#define f2fs_trace_ios(i, n) #define f2fs_build_trace_ios() #define f2fs_destroy_trace_ios() From 3b078683c92aaa7a6f0d708c029a8c1803eb93e9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 Apr 2015 14:34:30 -0700 Subject: [PATCH 247/321] f2fs: move get_page for gc victims This patch moves getting victim page into move_data_page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 72667a54..1bd11f01 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -518,14 +518,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return 1; } -static void move_data_page(struct inode *inode, struct page *page, int gc_type) +static void move_data_page(struct inode *inode, block_t bidx, int gc_type) { - struct f2fs_io_info fio = { - .sbi = F2FS_I_SB(inode), - .type = DATA, - .rw = WRITE_SYNC, - .page = page, - }; + struct page *page; + + page = get_lock_data_page(inode, bidx); + if (IS_ERR(page)) + return; if (gc_type == BG_GC) { if (PageWriteback(page)) @@ -533,6 +532,12 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) set_page_dirty(page); set_cold_data(page); } else { + struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), + .type = DATA, + .rw = WRITE_SYNC, + .page = page, + }; f2fs_wait_on_page_writeback(page, DATA); if (clear_page_dirty_for_io(page)) @@ -618,12 +623,9 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* phase 3 */ inode = find_gc_inode(gc_list, dni.ino); if (inode) { - start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); - data_page = get_lock_data_page(inode, - start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; - move_data_page(inode, data_page, gc_type); + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)) + + ofs_in_node; + move_data_page(inode, start_bidx, gc_type); stat_inc_data_blk_count(sbi, 1, gc_type); } } From 86effce08a42f043a6fbe1a1fdcfad7f6cf28f54 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 26 Apr 2015 00:15:29 -0700 Subject: [PATCH 248/321] f2fs: introduce dot and dotdot name check This patch adds an inline function to check dot and dotdot names. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 11 +++++++++++ fs/f2fs/hash.c | 3 +-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5f06d4fa..2c1affb4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1453,6 +1453,17 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) sbi->sb->s_flags |= MS_RDONLY; } +static inline bool is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + #define get_inode_mode(i) \ ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index a844fcfb..71b7206c 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -79,8 +79,7 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) const unsigned char *name = name_info->name; size_t len = name_info->len; - if ((len <= 2) && (name[0] == '.') && - (name[1] == '.' || name[1] == '\0')) + if (is_dot_dotdot(name_info)) return 0; /* Initialize the default seed for the hash checksum functions */ From dfe5898fa05bb5cc853e4e055e1fd4af55e5d800 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Apr 2015 11:18:42 -0700 Subject: [PATCH 249/321] f2fs: fix race on allocating and deallocating a dentry block There are two threads: f2fs_delete_entry() get_new_data_page() f2fs_reserve_block() dn.blkaddr = XXX lock_page(dentry_block) truncate_hole() dn.blkaddr = NULL unlock_page(dentry_block) lock_page(dentry_block) fill the block from XXX address add new dentries unlock_page(dentry_block) Later, f2fs_write_data_page() will truncate the dentry_block, since its block address is NULL. The reason for this was due to the wrong lock order. In this case, we should do f2fs_reserve_block() after locking its dentry block. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3a2c82e2..db042ba1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1077,20 +1077,22 @@ struct page *get_new_data_page(struct inode *inode, struct page *page; struct dnode_of_data dn; int err; +repeat: + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, index); - if (err) + if (err) { + f2fs_put_page(page, 1); return ERR_PTR(err); -repeat: - page = grab_cache_page(mapping, index); - if (!page) { - err = -ENOMEM; - goto put_err; } + if (!ipage) + f2fs_put_dnode(&dn); if (PageUptodate(page)) - return page; + goto got_it; if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); @@ -1105,20 +1107,19 @@ struct page *get_new_data_page(struct inode *inode, }; err = f2fs_submit_page_bio(&fio); if (err) - goto put_err; + return ERR_PTR(err); lock_page(page); if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); - err = -EIO; - goto put_err; + return ERR_PTR(-EIO); } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } } - +got_it: if (new_i_size && i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); @@ -1126,10 +1127,6 @@ struct page *get_new_data_page(struct inode *inode, set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); } return page; - -put_err: - f2fs_put_dnode(&dn); - return ERR_PTR(err); } static int __allocate_data_block(struct dnode_of_data *dn) From bd0318401ff5d58a74f566a1a4b25eef171919ab Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Apr 2015 18:31:19 -0700 Subject: [PATCH 250/321] f2fs: add need_dentry_mark This patch introduces need_dentry_mark() to clean up and avoid redundant node locks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 35 +++++++++++++++++------------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2c1affb4..55bd1f19 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1564,6 +1564,7 @@ struct dnode_of_data; struct node_info; bool available_free_memory(struct f2fs_sb_info *, int); +int need_dentry_mark(struct f2fs_sb_info *, nid_t); bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); bool need_inode_block_update(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0ef4c4de..b775813c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -195,32 +195,35 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } -bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - bool is_cp = true; + bool need = false; down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); - if (e && !get_nat_flag(e, IS_CHECKPOINTED)) - is_cp = false; + if (e) { + if (!get_nat_flag(e, IS_CHECKPOINTED) && + !get_nat_flag(e, HAS_FSYNCED_INODE)) + need = true; + } up_read(&nm_i->nat_tree_lock); - return is_cp; + return need; } -static bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - bool fsynced = false; + bool is_cp = true; down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ino); - if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) - fsynced = true; + e = __lookup_nat_cache(nm_i, nid); + if (e && !get_nat_flag(e, IS_CHECKPOINTED)) + is_cp = false; up_read(&nm_i->nat_tree_lock); - return fsynced; + return is_cp; } bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) @@ -1208,13 +1211,9 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, /* called by fsync() */ if (ino && IS_DNODE(page)) { set_fsync_mark(page, 1); - if (IS_INODE(page)) { - if (!is_checkpointed_node(sbi, ino) && - !has_fsynced_inode(sbi, ino)) - set_dentry_mark(page, 1); - else - set_dentry_mark(page, 0); - } + if (IS_INODE(page)) + set_dentry_mark(page, + need_dentry_mark(sbi, ino)); nwritten++; } else { set_fsync_mark(page, 0); From b45dd3f0ab18610acc515ed5004028e06ec098db Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Apr 2015 18:58:22 -0700 Subject: [PATCH 251/321] f2fs: fix counting the number of inline_data inodes This patch fixes to count the missing symlink case. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/namei.c Change-Id: If7dd30900da0b507c6190b996c7f622d0ede1676 --- fs/f2fs/namei.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0a67290a..d28e1ff9 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -62,6 +62,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_may_inline_dentry(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + trace_f2fs_new_inode(inode, 0); mark_inode_dirty(inode); return inode; @@ -137,7 +140,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); - stat_inc_inline_inode(inode); d_instantiate(dentry, inode); unlock_new_inode(inode); @@ -387,7 +389,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; f2fs_unlock_op(sbi); - stat_inc_inline_dir(inode); alloc_nid_done(sbi, inode->i_ino); d_instantiate(dentry, inode); From b73c8b664fce1a4ff4330c29eefba9bc4527f418 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Apr 2015 17:00:33 -0700 Subject: [PATCH 252/321] f2fs: split find_data_page according to specific purposes This patch splits find_data_page as follows. 1. f2fs_gc - use get_read_data_page() with read only 2. find_in_level - use find_data_page without locked page 3. truncate_partial_page - In the case cache_only mode, just drop cached page. - Ohterwise, use get_lock_data_page() and guarantee to truncate Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 127 +++++++++++++++++++------------------------------ fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 3 +- fs/f2fs/file.c | 26 +++++----- fs/f2fs/gc.c | 5 +- 5 files changed, 68 insertions(+), 95 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index db042ba1..d489d845 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -918,7 +918,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) sync_inode_page(dn); } -struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) +struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; @@ -928,84 +928,9 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, - .rw = sync ? READ_SYNC : READA, + .rw = rw, }; - /* - * If sync is false, it needs to check its block allocation. - * This is need and triggered by two flows: - * gc and truncate_partial_data_page. - */ - if (!sync) - goto search; - - page = find_get_page(mapping, index); - if (page && PageUptodate(page)) - return page; - f2fs_put_page(page, 0); -search: - if (f2fs_lookup_extent_cache(inode, index, &ei)) { - dn.data_blkaddr = ei.blk + index - ei.fofs; - goto got_it; - } - - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) - return ERR_PTR(err); - f2fs_put_dnode(&dn); - - if (dn.data_blkaddr == NULL_ADDR) - return ERR_PTR(-ENOENT); - - /* By fallocate(), there is no cached page, but with NEW_ADDR */ - if (unlikely(dn.data_blkaddr == NEW_ADDR)) - return ERR_PTR(-EINVAL); - -got_it: - page = grab_cache_page(mapping, index); - if (!page) - return ERR_PTR(-ENOMEM); - - if (PageUptodate(page)) { - unlock_page(page); - return page; - } - - fio.blk_addr = dn.data_blkaddr; - fio.page = page; - err = f2fs_submit_page_bio(&fio); - if (err) - return ERR_PTR(err); - - if (sync) { - wait_on_page_locked(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 0); - return ERR_PTR(-EIO); - } - } - return page; -} - -/* - * If it tries to access a hole, return an error. - * Because, the callers, functions in dir.c and GC, should be able to know - * whether this page exists or not. - */ -struct page *get_lock_data_page(struct inode *inode, pgoff_t index) -{ - struct address_space *mapping = inode->i_mapping; - struct dnode_of_data dn; - struct page *page; - struct extent_info ei; - int err; - struct f2fs_io_info fio = { - .sbi = F2FS_I_SB(inode), - .type = DATA, - .rw = READ_SYNC, - }; -repeat: page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -1027,10 +952,11 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) f2fs_put_page(page, 1); return ERR_PTR(-ENOENT); } - got_it: - if (PageUptodate(page)) + if (PageUptodate(page)) { + unlock_page(page); return page; + } /* * A new dentry page is allocated but not able to be written, since its @@ -1041,6 +967,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); + unlock_page(page); return page; } @@ -1049,7 +976,49 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) err = f2fs_submit_page_bio(&fio); if (err) return ERR_PTR(err); + return page; +} + +struct page *find_data_page(struct inode *inode, pgoff_t index) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page; + + page = find_get_page(mapping, index); + if (page && PageUptodate(page)) + return page; + f2fs_put_page(page, 0); + + page = get_read_data_page(inode, index, READ_SYNC); + if (IS_ERR(page)) + return page; + + if (PageUptodate(page)) + return page; + + wait_on_page_locked(page); + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 0); + return ERR_PTR(-EIO); + } + return page; +} + +/* + * If it tries to access a hole, return an error. + * Because, the callers, functions in dir.c and GC, should be able to know + * whether this page exists or not. + */ +struct page *get_lock_data_page(struct inode *inode, pgoff_t index) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page; +repeat: + page = get_read_data_page(inode, index, READ_SYNC); + if (IS_ERR(page)) + return page; + /* wait for read completion */ lock_page(page); if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 24ce33ea..2f58e7c5 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -190,7 +190,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, for (; bidx < end_block; bidx++) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = find_data_page(dir, bidx, true); + dentry_page = find_data_page(dir, bidx); if (IS_ERR(dentry_page)) { room = true; continue; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 55bd1f19..dc80a6f7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1674,7 +1674,8 @@ void f2fs_destroy_extent_tree(struct inode *); void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); void f2fs_update_extent_cache(struct dnode_of_data *); void f2fs_preserve_extent_tree(struct inode *); -struct page *find_data_page(struct inode *, pgoff_t, bool); +struct page *get_read_data_page(struct inode *, pgoff_t, int); +struct page *find_data_page(struct inode *, pgoff_t); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct f2fs_io_info *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2fa76f94..b95a25e6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -479,28 +479,32 @@ void truncate_data_blocks(struct dnode_of_data *dn) } static int truncate_partial_data_page(struct inode *inode, u64 from, - bool force) + bool cache_only) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); + pgoff_t index = from >> PAGE_CACHE_SHIFT; + struct address_space *mapping = inode->i_mapping; struct page *page; - if (!offset && !force) + if (!offset && !cache_only) return 0; - page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force); - if (IS_ERR(page)) + if (cache_only) { + page = grab_cache_page(mapping, index); + if (page && PageUptodate(page)) + goto truncate_out; + f2fs_put_page(page, 1); return 0; + } - lock_page(page); - if (unlikely(!PageUptodate(page) || - page->mapping != inode->i_mapping)) - goto out; - + page = get_lock_data_page(inode, index); + if (IS_ERR(page)) + return 0; +truncate_out: f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); - if (!force) + if (!cache_only) set_page_dirty(page); -out: f2fs_put_page(page, 1); return 0; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1bd11f01..2e2afebd 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -607,9 +607,8 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, continue; start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); - - data_page = find_data_page(inode, - start_bidx + ofs_in_node, false); + data_page = get_read_data_page(inode, + start_bidx + ofs_in_node, READA); if (IS_ERR(data_page)) { iput(inode); continue; From f870bc9535059d9abca8730876c03b14f42e551f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 1 May 2015 11:08:59 -0700 Subject: [PATCH 253/321] f2fs: revmove spin_lock for write_orphan_inodes This patch removes spin_lock, since this is covered by f2fs_lock_op already. And, we should avoid to use page operations inside spin_lock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0dba0d5f..7b3865c1 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -515,7 +515,12 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) grab_meta_page(sbi, start_blk + index); index = 1; - spin_lock(&im->ino_lock); + + /* + * we don't need to do spin_lock(&im->ino_lock) here, since all the + * orphan inode operations are covered under f2fs_lock_op(). + * And, spin_lock should be avoided due to page operations below. + */ head = &im->ino_list; /* loop for each orphan inode entry and write them in Jornal block */ @@ -555,8 +560,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) set_page_dirty(page); f2fs_put_page(page, 1); } - - spin_unlock(&im->ino_lock); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, From 5656618f19178d4e850b8f5bc921dcade293c6ad Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Apr 2015 22:37:50 -0700 Subject: [PATCH 254/321] f2fs: introduce discard_map for f2fs_trim_fs This patch adds a bitmap for discard issues from f2fs_trim_fs. There-in rule is to issue discard commands only for invalidated blocks after mount. Once mount is done, f2fs_trim_fs trims out whole invalid area. After ehn, it will not issue and discrads redundantly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 21 +++++++++++++ fs/f2fs/segment.c | 71 +++++++++++++++++++++++++++----------------- fs/f2fs/segment.h | 1 + 5 files changed, 68 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7b3865c1..5b50a095 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1059,7 +1059,8 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && - (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC)) + (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || + (cpc->reason == CP_DISCARD && !sbi->discard_blks))) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index f5388f37..f50acbc5 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -143,7 +143,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct sit_info); si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); - si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += 3 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; if (sbi->segs_per_sec > 1) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dc80a6f7..707c4ceb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -117,6 +117,8 @@ enum { #define DEF_BATCHED_TRIM_SECTIONS 32 #define BATCHED_TRIM_SEGMENTS(sbi) \ (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) +#define BATCHED_TRIM_BLOCKS(sbi) \ + (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) struct cp_control { int reason; @@ -697,6 +699,7 @@ struct f2fs_sb_info { block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ block_t alloc_valid_block_count; /* # of allocated blocks */ + block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ u32 s_next_generation; /* for NFS support */ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ @@ -1224,6 +1227,24 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) return mask & *addr; } +static inline void f2fs_set_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr |= mask; +} + +static inline void f2fs_clear_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr &= ~mask; +} + static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) { int mask; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b5ce9f58..a5e31c8a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -527,6 +527,17 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, { sector_t start = SECTOR_FROM_BLOCK(blkstart); sector_t len = SECTOR_FROM_BLOCK(blklen); + struct seg_entry *se; + unsigned int offset; + block_t i; + + for (i = blkstart; i < blkstart + blklen; i++) { + se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); + offset = GET_BLKOFF_FROM_SEG0(sbi, i); + + if (!f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; + } trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); } @@ -542,7 +553,8 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) } static void __add_discard_entry(struct f2fs_sb_info *sbi, - struct cp_control *cpc, unsigned int start, unsigned int end) + struct cp_control *cpc, struct seg_entry *se, + unsigned int start, unsigned int end) { struct list_head *head = &SM_I(sbi)->discard_list; struct discard_entry *new, *last; @@ -573,41 +585,24 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *discard_map = (unsigned long *)se->discard_map; unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; bool force = (cpc->reason == CP_DISCARD); int i; - if (!force && (!test_opt(sbi, DISCARD) || - SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)) + if (se->valid_blocks == max_blocks) return; - if (force && !se->valid_blocks) { - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - /* - * if this segment is registered in the prefree list, then - * we should skip adding a discard candidate, and let the - * checkpoint do that later. - */ - mutex_lock(&dirty_i->seglist_lock); - if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) { - mutex_unlock(&dirty_i->seglist_lock); - cpc->trimmed += sbi->blocks_per_seg; - return; - } - mutex_unlock(&dirty_i->seglist_lock); - - __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg); + if (!force) { + if (!test_opt(sbi, DISCARD) || !se->valid_blocks || + SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) return; } - /* zero block will be discarded through the prefree list */ - if (!se->valid_blocks || se->valid_blocks == max_blocks) - return; - /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = force ? ~ckpt_map[i] : + dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { @@ -620,7 +615,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (force && end - start < cpc->trim_minlen) continue; - __add_discard_entry(sbi, cpc, start, end); + __add_discard_entry(sbi, cpc, se, start, end); } } @@ -734,9 +729,13 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) if (del > 0) { if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); + if (!f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; } else { if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); + if (f2fs_test_and_clear_bit(offset, se->discard_map)) + sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks += del; @@ -1139,7 +1138,14 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) /* do checkpoint to issue discard commands safely */ for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { cpc.trim_start = start_segno; - cpc.trim_end = min_t(unsigned int, rounddown(start_segno + + + if (sbi->discard_blks == 0) + break; + else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) + cpc.trim_end = end_segno; + else + cpc.trim_end = min_t(unsigned int, + rounddown(start_segno + BATCHED_TRIM_SEGMENTS(sbi), sbi->segs_per_sec) - 1, end_segno); @@ -1918,8 +1924,11 @@ static int build_sit_info(struct f2fs_sb_info *sbi) = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - if (!sit_i->sentries[start].cur_valid_map - || !sit_i->sentries[start].ckpt_valid_map) + sit_i->sentries[start].discard_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map || + !sit_i->sentries[start].ckpt_valid_map || + !sit_i->sentries[start].discard_map) return -ENOMEM; } @@ -2057,6 +2066,11 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) got_it: check_block_count(sbi, start, &sit); seg_info_from_raw_sit(se, &sit); + + /* build discard map only one time */ + memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks; + if (sbi->segs_per_sec > 1) { struct sec_entry *e = get_sec_entry(sbi, start); e->valid_blocks += se->valid_blocks; @@ -2306,6 +2320,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) for (start = 0; start < MAIN_SEGS(sbi); start++) { kfree(sit_i->sentries[start].cur_valid_map); kfree(sit_i->sentries[start].ckpt_valid_map); + kfree(sit_i->sentries[start].discard_map); } } kfree(sit_i->tmp_map); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 85d7fa75..84963577 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -163,6 +163,7 @@ struct seg_entry { */ unsigned short ckpt_valid_blocks; unsigned char *ckpt_valid_map; + unsigned char *discard_map; unsigned char type; /* segment type like CURSEG_XXX_TYPE */ unsigned long long mtime; /* modification time of the segment */ }; From 765eba861a8a15f9cbc211f2b3e31cf6b44f11d7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Apr 2015 22:50:06 -0700 Subject: [PATCH 255/321] f2fs: issue discard with finally produced len and minlen This patch determines to issue discard commands by comparing given minlen and the length of produced final candidates. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 14 ++++++-------- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5b50a095..fd7a21df 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1044,7 +1044,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (unlikely(f2fs_cp_error(sbi))) return; - clear_prefree_segments(sbi); + clear_prefree_segments(sbi, cpc); clear_sbi_flag(sbi, SBI_IS_DIRTY); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 707c4ceb..e53f1305 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1627,7 +1627,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *); void destroy_flush_cmd_control(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); -void clear_prefree_segments(struct f2fs_sb_info *); +void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); void release_discard_addrs(struct f2fs_sb_info *); void discard_next_dnode(struct f2fs_sb_info *, block_t); int npages_for_summary_flush(struct f2fs_sb_info *, bool); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a5e31c8a..bd3e4deb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -611,10 +611,6 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); - - if (force && end - start < cpc->trim_minlen) - continue; - __add_discard_entry(sbi, cpc, se, start, end); } } @@ -645,7 +641,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -void clear_prefree_segments(struct f2fs_sb_info *sbi) +void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct list_head *head = &(SM_I(sbi)->discard_list); struct discard_entry *entry, *this; @@ -678,7 +674,10 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { + if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) + goto skip; f2fs_issue_discard(sbi, entry->blkaddr, entry->len); +skip: list_del(&entry->list); SM_I(sbi)->nr_discards -= entry->len; kmem_cache_free(discard_entry_slab, entry); @@ -1120,8 +1119,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) unsigned int start_segno, end_segno; struct cp_control cpc; - if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) || - range->len < sbi->blocksize) + if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; cpc.trimmed = 0; @@ -1133,7 +1131,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); cpc.reason = CP_DISCARD; - cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen); + cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); /* do checkpoint to issue discard commands safely */ for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { From 0601c22bed2211def702168e8e82dc90e11bc436 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Apr 2015 18:34:41 +0800 Subject: [PATCH 256/321] f2fs: remove unneeded f2fs_make_empty declaration Remove f2fs_make_empty() declaration, since the main body of this function is move into do_make_empty_dir() and the function is obsolete now. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e53f1305..a88714e9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1556,7 +1556,6 @@ int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); -int f2fs_make_empty(struct inode *, struct inode *); bool f2fs_empty_dir(struct inode *); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) From 19e067bf9838e4470c55381d7f60709fc33ed1dc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Apr 2015 18:35:50 +0800 Subject: [PATCH 257/321] f2fs: do not re-lookup nat cache with same nid In set_node_addr, we try to lookup cached nat entry of inode and then set flag in it. But previously in this function, we have already grabbed nat entry with current node id, if the node id is the same as the one of inode, we do not need to lookup it in cache again. So this patch adds condition judgment for reducing unneeded lookup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b775813c..bb74f58e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -315,7 +315,8 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, __set_nat_cache_dirty(nm_i, e); /* update fsync_mark if its inode nat entry is still alive */ - e = __lookup_nat_cache(nm_i, ni->ino); + if (ni->nid != ni->ino) + e = __lookup_nat_cache(nm_i, ni->ino); if (e) { if (fsync_done && ni->nid == ni->ino) set_nat_flag(e, HAS_FSYNCED_INODE, true); From df7e7df08191038e425db7c1aa632cb710fb5492 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 May 2015 13:08:06 +0800 Subject: [PATCH 258/321] f2fs: introduce f2fs_replace_block() for reuse Introduce a generic function replace_block base on recover_data_page, and export it. So with it we can operate file's meta data which is in CP/SSA area when we invoke fallocate with FALLOC_FL_COLLAPSE_RANGE flag. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 31 ++++++++++++++++++++++++------- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a88714e9..112d6cd8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1637,8 +1637,8 @@ void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(unsigned int, struct f2fs_io_info *); void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); void rewrite_data_page(struct f2fs_io_info *); -void recover_data_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, block_t, block_t); +void f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *, + block_t, block_t, bool); void allocate_data_block(struct f2fs_sb_info *, struct page *, block_t, block_t *, struct f2fs_summary *, int); void f2fs_wait_on_page_writeback(struct page *, enum page_type); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 65821114..026ba6d3 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -412,7 +412,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); /* write dummy data page */ - recover_data_page(sbi, NULL, &sum, src, dest); + f2fs_replace_block(sbi, &sum, src, dest, false); dn.data_blkaddr = dest; set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bd3e4deb..fa850154 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1323,32 +1323,41 @@ void rewrite_data_page(struct f2fs_io_info *fio) f2fs_submit_page_mbio(fio); } -void recover_data_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) +void f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr, + bool recover_curseg) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; unsigned int segno, old_cursegno; struct seg_entry *se; int type; + unsigned short old_blkoff; segno = GET_SEGNO(sbi, new_blkaddr); se = get_seg_entry(sbi, segno); type = se->type; - if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { - if (old_blkaddr == NULL_ADDR) - type = CURSEG_COLD_DATA; - else + if (!recover_curseg) { + /* for recovery flow */ + if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (old_blkaddr == NULL_ADDR) + type = CURSEG_COLD_DATA; + else + type = CURSEG_WARM_DATA; + } + } else { + if (!IS_CURSEG(sbi, segno)) type = CURSEG_WARM_DATA; } + curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); old_cursegno = curseg->segno; + old_blkoff = curseg->next_blkoff; /* change the current segment */ if (segno != curseg->segno) { @@ -1362,6 +1371,14 @@ void recover_data_page(struct f2fs_sb_info *sbi, refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); locate_dirty_segment(sbi, old_cursegno); + if (recover_curseg) { + if (old_cursegno != curseg->segno) { + curseg->next_segno = old_cursegno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = old_blkoff; + } + mutex_unlock(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); } From 1f37a1a754609516eacbdb0b2fb0d726f1d079e9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 May 2015 13:09:46 +0800 Subject: [PATCH 259/321] f2fs: support FALLOC_FL_COLLAPSE_RANGE Now, FALLOC_FL_COLLAPSE_RANGE flag in ->fallocate is supported in ext4/xfs. In commit, the semantics of this flag is descripted as following:" 1) It collapses the range lying between offset and length by removing any data blocks which are present in this range and than updates all the logical offsets of extents beyond "offset + len" to nullify the hole created by removing blocks. In short, it does not leave a hole. 2) It should be used exclusively. No other fallocate flag in combination. 3) Offset and length supplied to fallocate should be fs block size aligned in case of xfs and ext4. 4) Collaspe range does not work beyond i_size." This patch implements fallocate's FALLOC_FL_COLLAPSE_RANGE for f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b95a25e6..90399a56 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -782,6 +782,132 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) return ret; } +static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + int ret = 0; + + f2fs_lock_op(sbi); + + for (; end < nrpages; start++, end++) { + block_t new_addr, old_addr; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA); + if (ret && ret != -ENOENT) { + goto out; + } else if (ret == -ENOENT) { + new_addr = NULL_ADDR; + } else { + new_addr = dn.data_blkaddr; + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + } + + if (new_addr == NULL_ADDR) { + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA); + if (ret && ret != -ENOENT) + goto out; + else if (ret == -ENOENT) + continue; + + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_dnode(&dn); + continue; + } else { + truncate_data_blocks_range(&dn, 1); + } + + f2fs_put_dnode(&dn); + } else { + struct page *ipage; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + ret = PTR_ERR(ipage); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + ret = f2fs_reserve_block(&dn, start); + if (ret) + goto out; + + old_addr = dn.data_blkaddr; + if (old_addr != NEW_ADDR && new_addr == NEW_ADDR) { + dn.data_blkaddr = NULL_ADDR; + f2fs_update_extent_cache(&dn); + invalidate_blocks(sbi, old_addr); + + dn.data_blkaddr = new_addr; + set_data_blkaddr(&dn); + } else if (new_addr != NEW_ADDR) { + struct node_info ni; + struct f2fs_summary sum; + + get_node_info(sbi, dn.nid, &ni); + set_summary(&sum, dn.nid, dn.ofs_in_node, + ni.version); + + f2fs_replace_block(sbi, &sum, old_addr, + new_addr, true); + + dn.data_blkaddr = new_addr; + set_data_blkaddr(&dn); + f2fs_update_extent_cache(&dn); + } + + f2fs_put_dnode(&dn); + } + } + ret = 0; +out: + f2fs_unlock_op(sbi); + return ret; +} + +static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) +{ + pgoff_t pg_start, pg_end; + loff_t new_size; + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (offset + len >= i_size_read(inode)) + return -EINVAL; + + /* collapse range should be aligned to block size of f2fs. */ + if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) + return -EINVAL; + + pg_start = offset >> PAGE_CACHE_SHIFT; + pg_end = (offset + len) >> PAGE_CACHE_SHIFT; + + /* write out all dirty pages from offset */ + ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); + if (ret) + return ret; + + truncate_pagecache(inode, 0, offset); + + ret = f2fs_do_collapse(inode, pg_start, pg_end); + if (ret) + return ret; + + new_size = i_size_read(inode) - len; + + ret = truncate_blocks(inode, new_size, true); + if (!ret) + i_size_write(inode, new_size); + + return ret; +} + static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { @@ -843,13 +969,16 @@ static int expand_inode_data(struct inode *inode, loff_t offset, return ret; } +#define FALLOC_FL_COLLAPSE_RANGE 0X08 + static long f2fs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); long ret = 0; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); @@ -859,9 +988,11 @@ static long f2fs_fallocate(struct file *file, int mode, goto out; ret = punch_hole(inode, offset, len); - } - else + } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { + ret = f2fs_collapse_range(inode, offset, len); + } else { ret = expand_inode_data(inode, offset, len, mode); + } if (!ret) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; From b19e1df2cd608fa9b7b5d4448412c8a129c66288 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 May 2015 13:11:13 +0800 Subject: [PATCH 260/321] f2fs: support FALLOC_FL_ZERO_RANGE Now, FALLOC_FL_ZERO_RANGE flag in ->fallocate is supported in ext4/xfs. In commit, the semantics of this flag is descripted as following:" 1) Make sure that both offset and len are block size aligned. 2) Update the i_size of inode by len bytes. 3) Compute the file's logical block number against offset. If the computed block number is not the starting block of the extent, split the extent such that the block number is the starting block of the extent. 4) Shift all the extents which are lying between [offset, last allocated extent] towards right by len bytes. This step will make a hole of len bytes at offset." This patch implements fallocate's FALLOC_FL_ZERO_RANGE for f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 90399a56..2a62e9fd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -908,6 +908,108 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) return ret; } +static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, + int mode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct address_space *mapping = inode->i_mapping; + pgoff_t index, pg_start, pg_end; + loff_t new_size = i_size_read(inode); + loff_t off_start, off_end; + int ret = 0; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + ret = inode_newsize_ok(inode, (len + offset)); + if (ret) + return ret; + + f2fs_balance_fs(sbi); + + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } + + ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); + if (ret) + return ret; + + truncate_pagecache_range(inode, offset, offset + len - 1); + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + if (pg_start == pg_end) { + fill_zero(inode, pg_start, off_start, off_end - off_start); + if (offset + len > new_size) + new_size = offset + len; + new_size = max_t(loff_t, new_size, offset + len); + } else { + if (off_start) { + fill_zero(inode, pg_start++, off_start, + PAGE_CACHE_SIZE - off_start); + new_size = max_t(loff_t, new_size, + pg_start << PAGE_CACHE_SHIFT); + } + + for (index = pg_start; index < pg_end; index++) { + struct dnode_of_data dn; + struct page *ipage; + + f2fs_lock_op(sbi); + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + ret = PTR_ERR(ipage); + f2fs_unlock_op(sbi); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + ret = f2fs_reserve_block(&dn, index); + if (ret) { + f2fs_unlock_op(sbi); + goto out; + } + + if (dn.data_blkaddr != NEW_ADDR) { + invalidate_blocks(sbi, dn.data_blkaddr); + + dn.data_blkaddr = NEW_ADDR; + set_data_blkaddr(&dn); + + dn.data_blkaddr = NULL_ADDR; + f2fs_update_extent_cache(&dn); + } + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + + new_size = max_t(loff_t, new_size, + (index + 1) << PAGE_CACHE_SHIFT); + } + + if (off_end) { + fill_zero(inode, pg_end, 0, off_end); + new_size = max_t(loff_t, new_size, offset + len); + } + } + +out: + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { + i_size_write(inode, new_size); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + return ret; +} + static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { @@ -970,6 +1072,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, } #define FALLOC_FL_COLLAPSE_RANGE 0X08 +#define FALLOC_FL_ZERO_RANGE 0X10 static long f2fs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) @@ -978,7 +1081,7 @@ static long f2fs_fallocate(struct file *file, int mode, long ret = 0; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_COLLAPSE_RANGE)) + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); @@ -990,6 +1093,8 @@ static long f2fs_fallocate(struct file *file, int mode, ret = punch_hole(inode, offset, len); } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { ret = f2fs_collapse_range(inode, offset, len); + } else if (mode & FALLOC_FL_ZERO_RANGE) { + ret = f2fs_zero_range(inode, offset, len, mode); } else { ret = expand_inode_data(inode, offset, len, mode); } From 9d25d5384f724bdf6870c5948657cf69434f91b3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 May 2015 16:37:28 -0700 Subject: [PATCH 261/321] f2fs: avoid value overflow in showing current status This patch fixes overflow when do cat /sys/kernel/debug/f2fs/status. If a section is relatively large, dist value can be overflowed. Reported-by: Yossi Goldfill Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index f50acbc5..efbc83f0 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -94,7 +94,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) static void update_sit_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; + unsigned long long blks_per_sec, hblks_per_sec, total_vblocks; + unsigned long long bimodal, dist; unsigned int segno, vblocks; int ndirty = 0; From 63236e82c08a9a05d93c55783061884605ea3bdd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 May 2015 19:30:32 -0700 Subject: [PATCH 262/321] f2fs: report unwritten area in f2fs_fiemap This patch slightly changes f2fs_fiemap function to report unwritten area. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/f2fs/f2fs.h | 4 +- 2 files changed, 117 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d489d845..e14762c7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1242,6 +1242,8 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (dn.data_blkaddr != NULL_ADDR) { map->m_flags = F2FS_MAP_MAPPED; map->m_pblk = dn.data_blkaddr; + if (dn.data_blkaddr == NEW_ADDR) + map->m_flags |= F2FS_MAP_UNWRITTEN; } else if (create) { err = __allocate_data_block(&dn); if (err) @@ -1289,7 +1291,10 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, blkaddr = dn.data_blkaddr; } /* Give more consecutive addresses for the readahead */ - if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) { + if ((map->m_pblk != NEW_ADDR && + blkaddr == (map->m_pblk + ofs)) || + (map->m_pblk == NEW_ADDR && + blkaddr == NEW_ADDR)) { ofs++; dn.ofs_in_node++; pgofs++; @@ -1340,11 +1345,117 @@ static int get_data_block_fiemap(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, true); } +static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) +{ + return (offset >> inode->i_blkbits); +} + +static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) +{ + return (blk << inode->i_blkbits); +} + int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { - return generic_block_fiemap(inode, fieinfo, - start, len, get_data_block_fiemap); + struct buffer_head map_bh; + sector_t start_blk, last_blk; + loff_t isize = i_size_read(inode); + u64 logical = 0, phys = 0, size = 0; + u32 flags = 0; + bool past_eof = false, whole_file = false; + int ret = 0; + + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (ret) + return ret; + + mutex_lock(&inode->i_mutex); + + if (len >= isize) { + whole_file = true; + len = isize; + } + + if (logical_to_blk(inode, len) == 0) + len = blk_to_logical(inode, 1); + + start_blk = logical_to_blk(inode, start); + last_blk = logical_to_blk(inode, start + len - 1); +next: + memset(&map_bh, 0, sizeof(struct buffer_head)); + map_bh.b_size = len; + + ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0); + if (ret) + goto out; + + /* HOLE */ + if (!buffer_mapped(&map_bh)) { + start_blk++; + + if (!past_eof && blk_to_logical(inode, start_blk) >= isize) + past_eof = 1; + + if (past_eof && size) { + flags |= FIEMAP_EXTENT_LAST; + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + } else if (size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + size = 0; + } + + /* if we have holes up to/past EOF then we're done */ + if (start_blk > last_blk || past_eof || ret) + goto out; + } else { + if (start_blk > last_blk && !whole_file) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + goto out; + } + + /* + * if size != 0 then we know we already have an extent + * to add, so add it. + */ + if (size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + if (ret) + goto out; + } + + logical = blk_to_logical(inode, start_blk); + phys = blk_to_logical(inode, map_bh.b_blocknr); + size = map_bh.b_size; + flags = 0; + if (buffer_unwritten(&map_bh)) + flags = FIEMAP_EXTENT_UNWRITTEN; + + start_blk += logical_to_blk(inode, size); + + /* + * If we are past the EOF, then we need to make sure as + * soon as we find a hole that the last extent we found + * is marked with FIEMAP_EXTENT_LAST + */ + if (!past_eof && logical + size >= isize) + past_eof = true; + } + cond_resched(); + if (fatal_signal_pending(current)) + ret = -EINTR; + else + goto next; +out: + if (ret == 1) + ret = 0; + + mutex_unlock(&inode->i_mutex); + return ret; } /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 112d6cd8..b11c7bea 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -329,7 +329,9 @@ struct extent_tree { */ #define F2FS_MAP_NEW (1 << BH_New) #define F2FS_MAP_MAPPED (1 << BH_Mapped) -#define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED) +#define F2FS_MAP_UNWRITTEN (1 << BH_Unwritten) +#define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED |\ + F2FS_MAP_UNWRITTEN) struct f2fs_map_blocks { block_t m_pblk; From 325c889bf414e9a5fc1760a8c02676c958d3ad71 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 13:57:51 -0700 Subject: [PATCH 263/321] f2fs crypto: declare some definitions for f2fs encryption feature This definitions will be used by inode and superblock for encyption. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/f2fs.h Change-Id: I184949a8daa1ec55d7fad3e88b71a1f56eb5d4e3 --- fs/f2fs/f2fs.h | 55 +++++++++++++++ fs/f2fs/f2fs_crypto.h | 147 ++++++++++++++++++++++++++++++++++++++++ include/linux/f2fs_fs.h | 4 +- 3 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 fs/f2fs/f2fs_crypto.h diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b11c7bea..7adf90de 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -70,6 +70,8 @@ struct f2fs_mount_info { unsigned int opt; }; +#define F2FS_FEATURE_ENCRYPT 0x0001 + #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) #define F2FS_SET_FEATURE(sb, mask) \ @@ -345,6 +347,7 @@ struct f2fs_map_blocks { */ #define FADVISE_COLD_BIT 0x01 #define FADVISE_LOST_PINO_BIT 0x02 +#define FADVISE_ENCRYPT_BIT 0x04 #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -352,6 +355,16 @@ struct f2fs_map_blocks { #define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) #define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) #define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) +#define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT) +#define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT) +#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) + +/* Encryption algorithms */ +#define F2FS_ENCRYPTION_MODE_INVALID 0 +#define F2FS_ENCRYPTION_MODE_AES_256_XTS 1 +#define F2FS_ENCRYPTION_MODE_AES_256_GCM 2 +#define F2FS_ENCRYPTION_MODE_AES_256_CBC 3 +#define F2FS_ENCRYPTION_MODE_AES_256_CTS 4 #define DEF_DIR_LEVEL 0 @@ -379,6 +392,11 @@ struct f2fs_inode_info { struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ + +#ifdef CONFIG_F2FS_FS_ENCRYPTION + /* Encryption params */ + struct f2fs_crypt_info *i_crypt_info; +#endif }; static inline void get_extent_info(struct extent_info *ext, @@ -1890,4 +1908,41 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); int f2fs_read_inline_dir(struct file *, void *, filldir_t); + +/* + * crypto support + */ +static inline int f2fs_encrypted_inode(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + return file_is_encrypt(inode); +#else + return 0; +#endif +} + +static inline void f2fs_set_encrypted_inode(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + file_set_encrypt(inode); +#endif +} + +static inline bool f2fs_bio_encrypted(struct bio *bio) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + return unlikely(bio->bi_private != NULL); +#else + return false; +#endif +} + +static inline int f2fs_sb_has_crypto(struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); +#else + return 0; +#endif +} #endif diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h new file mode 100644 index 00000000..3323266e --- /dev/null +++ b/fs/f2fs/f2fs_crypto.h @@ -0,0 +1,147 @@ +/* + * linux/fs/f2fs/f2fs_crypto.h + * + * Copied from linux/fs/ext4/ext4_crypto.h + * + * Copyright (C) 2015, Google, Inc. + * + * This contains encryption header content for f2fs + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#ifndef _F2FS_CRYPTO_H +#define _F2FS_CRYPTO_H + +#include + +#define F2FS_KEY_DESCRIPTOR_SIZE 8 + +/* Policy provided via an ioctl on the topmost directory */ +struct f2fs_encryption_policy { + char version; + char contents_encryption_mode; + char filenames_encryption_mode; + char flags; + char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE]; +} __attribute__((__packed__)); + +#define F2FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 +#define F2FS_KEY_DERIVATION_NONCE_SIZE 16 + +#define F2FS_POLICY_FLAGS_PAD_4 0x00 +#define F2FS_POLICY_FLAGS_PAD_8 0x01 +#define F2FS_POLICY_FLAGS_PAD_16 0x02 +#define F2FS_POLICY_FLAGS_PAD_32 0x03 +#define F2FS_POLICY_FLAGS_PAD_MASK 0x03 +#define F2FS_POLICY_FLAGS_VALID 0x03 + +/** + * Encryption context for inode + * + * Protector format: + * 1 byte: Protector format (1 = this version) + * 1 byte: File contents encryption mode + * 1 byte: File names encryption mode + * 1 byte: Flags + * 8 bytes: Master Key descriptor + * 16 bytes: Encryption Key derivation nonce + */ +struct f2fs_encryption_context { + char format; + char contents_encryption_mode; + char filenames_encryption_mode; + char flags; + char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE]; + char nonce[F2FS_KEY_DERIVATION_NONCE_SIZE]; +} __attribute__((__packed__)); + +/* Encryption parameters */ +#define F2FS_XTS_TWEAK_SIZE 16 +#define F2FS_AES_128_ECB_KEY_SIZE 16 +#define F2FS_AES_256_GCM_KEY_SIZE 32 +#define F2FS_AES_256_CBC_KEY_SIZE 32 +#define F2FS_AES_256_CTS_KEY_SIZE 32 +#define F2FS_AES_256_XTS_KEY_SIZE 64 +#define F2FS_MAX_KEY_SIZE 64 + +struct f2fs_encryption_key { + __u32 mode; + char raw[F2FS_MAX_KEY_SIZE]; + __u32 size; +} __attribute__((__packed__)); + +struct f2fs_crypt_info { + unsigned char ci_mode; + unsigned char ci_size; + char ci_data_mode; + char ci_filename_mode; + char ci_flags; + struct crypto_ablkcipher *ci_ctfm; + struct key *ci_keyring_key; + char ci_raw[F2FS_MAX_KEY_SIZE]; + char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE]; +}; + +#define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 +#define F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002 + +struct f2fs_crypto_ctx { + struct crypto_tfm *tfm; /* Crypto API context */ + struct page *bounce_page; /* Ciphertext page on write path */ + struct page *control_page; /* Original page on write path */ + struct bio *bio; /* The bio for this context */ + struct work_struct work; /* Work queue for read complete path */ + struct list_head free_list; /* Free list */ + int flags; /* Flags */ + int mode; /* Encryption mode for tfm */ +}; + +struct f2fs_completion_result { + struct completion completion; + int res; +}; + +#define DECLARE_F2FS_COMPLETION_RESULT(ecr) \ + struct f2fs_completion_result ecr = { \ + COMPLETION_INITIALIZER((ecr).completion), 0 } + +static inline int f2fs_encryption_key_size(int mode) +{ + switch (mode) { + case F2FS_ENCRYPTION_MODE_AES_256_XTS: + return F2FS_AES_256_XTS_KEY_SIZE; + case F2FS_ENCRYPTION_MODE_AES_256_GCM: + return F2FS_AES_256_GCM_KEY_SIZE; + case F2FS_ENCRYPTION_MODE_AES_256_CBC: + return F2FS_AES_256_CBC_KEY_SIZE; + case F2FS_ENCRYPTION_MODE_AES_256_CTS: + return F2FS_AES_256_CTS_KEY_SIZE; + default: + BUG(); + } + return 0; +} + +#define F2FS_FNAME_NUM_SCATTER_ENTRIES 4 +#define F2FS_CRYPTO_BLOCK_SIZE 16 +#define F2FS_FNAME_CRYPTO_DIGEST_SIZE 32 + +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct f2fs_encrypted_symlink_data { + __le16 len; + char encrypted_path[1]; +} __attribute__((__packed__)); + +/** + * This function is used to calculate the disk space required to + * store a filename of length l in encrypted symlink format. + */ +static inline u32 encrypted_symlink_data_len(u32 l) +{ + return (l + sizeof(struct f2fs_encrypted_symlink_data) - 1); +} +#endif /* _F2FS_CRYPTO_H */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d44e97f2..920408a2 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -91,7 +91,9 @@ struct f2fs_super_block { __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ __le32 feature; /* defined features */ - __u8 reserved[888]; /* valid reserved region */ + __u8 encryption_level; /* versioning level for encryption */ + __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ + __u8 reserved[871]; /* valid reserved region */ } __packed; /* From b123ef307f45e75104d95e20b779cbe5da009ec3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 10 Apr 2015 16:28:26 -0700 Subject: [PATCH 264/321] f2fs crypto: add f2fs encryption Kconfig This patch adds f2fs encryption config. This patch integrates: "ext4 crypto: require CONFIG_CRYPTO_CTR if ext4 encryption is enabled On arm64 this is apparently needed for CTS mode to function correctly. Otherwise attempts to use CTS return ENOENT." Signed-off-by: Michael Halcrow Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/Kconfig | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index dc132c54..363abf9e 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -72,6 +72,25 @@ config F2FS_CHECK_FS If you want to improve the performance, say N. +config F2FS_FS_ENCRYPTION + bool "F2FS Encryption" + depends on F2FS_FS + depends on F2FS_FS_XATTR + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_ECB + select CRYPTO_XTS + select CRYPTO_CTS + select CRYPTO_CTR + select CRYPTO_SHA256 + select KEYS + select ENCRYPTED_KEYS + help + Enable encryption of f2fs files and directories. This + feature is similar to ecryptfs, but it is more memory + efficient since it avoids caching the encrypted and + decrypted pages in the page cache. + config F2FS_IO_TRACE bool "F2FS IO tracer" depends on F2FS_FS From aad96b035db35a478f7b9949cd0c1b72071efe07 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 10 Apr 2015 16:43:31 -0700 Subject: [PATCH 265/321] f2fs crypto: add encryption xattr support This patch add some definition for enrcyption xattr. Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 95b55a0c..47cf0e58 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -35,6 +35,10 @@ #define F2FS_XATTR_INDEX_LUSTRE 5 #define F2FS_XATTR_INDEX_SECURITY 6 #define F2FS_XATTR_INDEX_ADVISE 7 +/* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */ +#define F2FS_XATTR_INDEX_ENCRYPTION 9 + +#define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c" struct f2fs_xattr_header { __le32 h_magic; /* magic number for identification */ From 63ab5a70a09610e4f00b1bc75916595d26db24d4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 15:19:06 -0700 Subject: [PATCH 266/321] f2fs crypto: add encryption policy and password salt support This patch adds encryption policy and password salt support through ioctl implementation. It adds three ioctls: F2FS_IOC_SET_ENCRYPTION_POLICY, F2FS_IOC_GET_ENCRYPTION_POLICY, F2FS_IOC_GET_ENCRYPTION_PWSALT, which use xattr operations. Note that, these definition and codes are taken from ext4 crypto support. For f2fs, xattr operations and on-disk flags for superblock and inode were changed. Signed-off-by: Michael Halcrow Signed-off-by: Theodore Ts'o Signed-off-by: Ildar Muslukhov Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/f2fs.h Change-Id: I5035631831f7ed7c0893c05f2ad916017c293199 --- fs/f2fs/Makefile | 1 + fs/f2fs/crypto_policy.c | 206 ++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 16 ++++ fs/f2fs/file.c | 93 ++++++++++++++++++ fs/f2fs/xattr.c | 3 + 5 files changed, 319 insertions(+) create mode 100644 fs/f2fs/crypto_policy.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index d9239773..7864f4f0 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -6,3 +6,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o +f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c new file mode 100644 index 00000000..bef254b2 --- /dev/null +++ b/fs/f2fs/crypto_policy.c @@ -0,0 +1,206 @@ +/* + * copied from linux/fs/ext4/crypto_policy.c + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility. + * + * This contains encryption policy functions for f2fs with some modifications + * to support f2fs-specific xattr APIs. + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#include +#include +#include +#include + +#include "f2fs.h" +#include "xattr.h" + +static int f2fs_inode_has_encryption_context(struct inode *inode) +{ + int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0, NULL); + return (res > 0); +} + +/* + * check whether the policy is consistent with the encryption context + * for the inode + */ +static int f2fs_is_encryption_context_consistent_with_policy( + struct inode *inode, const struct f2fs_encryption_policy *policy) +{ + struct f2fs_encryption_context ctx; + int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, + sizeof(ctx), NULL); + + if (res != sizeof(ctx)) + return 0; + + return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE) == 0 && + (ctx.flags == policy->flags) && + (ctx.contents_encryption_mode == + policy->contents_encryption_mode) && + (ctx.filenames_encryption_mode == + policy->filenames_encryption_mode)); +} + +static int f2fs_create_encryption_context_from_policy( + struct inode *inode, const struct f2fs_encryption_policy *policy) +{ + struct f2fs_encryption_context ctx; + + ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1; + memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE); + + if (!f2fs_valid_contents_enc_mode(policy->contents_encryption_mode)) { + printk(KERN_WARNING + "%s: Invalid contents encryption mode %d\n", __func__, + policy->contents_encryption_mode); + return -EINVAL; + } + + if (!f2fs_valid_filenames_enc_mode(policy->filenames_encryption_mode)) { + printk(KERN_WARNING + "%s: Invalid filenames encryption mode %d\n", __func__, + policy->filenames_encryption_mode); + return -EINVAL; + } + + if (policy->flags & ~F2FS_POLICY_FLAGS_VALID) + return -EINVAL; + + ctx.contents_encryption_mode = policy->contents_encryption_mode; + ctx.filenames_encryption_mode = policy->filenames_encryption_mode; + ctx.flags = policy->flags; + BUILD_BUG_ON(sizeof(ctx.nonce) != F2FS_KEY_DERIVATION_NONCE_SIZE); + get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE); + + return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, + sizeof(ctx), NULL, 0); +} + +int f2fs_process_policy(const struct f2fs_encryption_policy *policy, + struct inode *inode) +{ + if (policy->version != 0) + return -EINVAL; + + if (!f2fs_inode_has_encryption_context(inode)) { + if (!f2fs_empty_dir(inode)) + return -ENOTEMPTY; + return f2fs_create_encryption_context_from_policy(inode, + policy); + } + + if (f2fs_is_encryption_context_consistent_with_policy(inode, policy)) + return 0; + + printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n", + __func__); + return -EINVAL; +} + +int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy) +{ + struct f2fs_encryption_context ctx; + int res; + + if (!f2fs_encrypted_inode(inode)) + return -ENODATA; + + res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, + &ctx, sizeof(ctx), NULL); + if (res != sizeof(ctx)) + return -ENODATA; + if (ctx.format != F2FS_ENCRYPTION_CONTEXT_FORMAT_V1) + return -EINVAL; + + policy->version = 0; + policy->contents_encryption_mode = ctx.contents_encryption_mode; + policy->filenames_encryption_mode = ctx.filenames_encryption_mode; + policy->flags = ctx.flags; + memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE); + return 0; +} + +int f2fs_is_child_context_consistent_with_parent(struct inode *parent, + struct inode *child) +{ + struct f2fs_crypt_info *parent_ci, *child_ci; + int res; + + if ((parent == NULL) || (child == NULL)) { + pr_err("parent %p child %p\n", parent, child); + BUG_ON(1); + } + + /* no restrictions if the parent directory is not encrypted */ + if (!f2fs_encrypted_inode(parent)) + return 1; + /* if the child directory is not encrypted, this is always a problem */ + if (!f2fs_encrypted_inode(child)) + return 0; + res = f2fs_get_encryption_info(parent); + if (res) + return 0; + res = f2fs_get_encryption_info(child); + if (res) + return 0; + parent_ci = F2FS_I(parent)->i_crypt_info; + child_ci = F2FS_I(child)->i_crypt_info; + if (!parent_ci && !child_ci) + return 1; + if (!parent_ci || !child_ci) + return 0; + + return (memcmp(parent_ci->ci_master_key, + child_ci->ci_master_key, + F2FS_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ci->ci_data_mode == child_ci->ci_data_mode) && + (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && + (parent_ci->ci_flags == child_ci->ci_flags)); +} + +/** + * f2fs_inherit_context() - Sets a child context from its parent + * @parent: Parent inode from which the context is inherited. + * @child: Child inode that inherits the context from @parent. + * + * Return: Zero on success, non-zero otherwise + */ +int f2fs_inherit_context(struct inode *parent, struct inode *child, + struct page *ipage) +{ + struct f2fs_encryption_context ctx; + struct f2fs_crypt_info *ci; + int res; + + res = f2fs_get_encryption_info(parent); + if (res < 0) + return res; + + ci = F2FS_I(parent)->i_crypt_info; + BUG_ON(ci == NULL); + + ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1; + + ctx.contents_encryption_mode = ci->ci_data_mode; + ctx.filenames_encryption_mode = ci->ci_filename_mode; + ctx.flags = ci->ci_flags; + memcpy(ctx.master_key_descriptor, ci->ci_master_key, + F2FS_KEY_DESCRIPTOR_SIZE); + + get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE); + return f2fs_setxattr(child, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, + sizeof(ctx), ipage, 0); +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7adf90de..ab414ba2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -237,6 +237,13 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_SET_ENCRYPTION_POLICY \ + _IOR('f', 19, struct f2fs_encryption_policy) +#define F2FS_IOC_GET_ENCRYPTION_PWSALT \ + _IOW('f', 20, __u8[16]) +#define F2FS_IOC_GET_ENCRYPTION_POLICY \ + _IOW('f', 21, struct f2fs_encryption_policy) + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* * ioctl commands in 32 bit emulation @@ -366,6 +373,8 @@ struct f2fs_map_blocks { #define F2FS_ENCRYPTION_MODE_AES_256_CBC 3 #define F2FS_ENCRYPTION_MODE_AES_256_CTS 4 +#include "f2fs_crypto.h" + #define DEF_DIR_LEVEL 0 struct f2fs_inode_info { @@ -1945,4 +1954,11 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb) return 0; #endif } + +/* crypto_policy.c */ +int f2fs_is_child_context_consistent_with_parent(struct inode *, + struct inode *); +int f2fs_inherit_context(struct inode *, struct inode *, struct page *); +int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *); +int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2a62e9fd..8fe79429 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -1365,6 +1366,92 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) return 0; } +static bool uuid_is_nonzero(__u8 u[16]) +{ + int i; + + for (i = 0; i < 16; i++) + if (u[i]) + return true; + return false; +} + +static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + struct f2fs_encryption_policy policy; + struct inode *inode = file_inode(filp); + + if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg, + sizeof(policy))) + return -EFAULT; + + if (f2fs_has_inline_data(inode)) { + int ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } + + return f2fs_process_policy(&policy, inode); +#else + return -EOPNOTSUPP; +#endif +} + +static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + struct f2fs_encryption_policy policy; + struct inode *inode = file_inode(filp); + int err; + + err = f2fs_get_policy(inode, &policy); + if (err) + return err; + + if (copy_to_user((struct f2fs_encryption_policy __user *)arg, &policy, + sizeof(policy))) + return -EFAULT; + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int err; + + if (!f2fs_sb_has_crypto(inode->i_sb)) + return -EOPNOTSUPP; + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + + err = mnt_want_write_file(filp); + if (err) + return err; + + /* update superblock with uuid */ + generate_random_uuid(sbi->raw_super->encrypt_pw_salt); + + err = f2fs_commit_super(sbi); + + mnt_drop_write_file(filp); + if (err) { + /* undo new data */ + memset(sbi->raw_super->encrypt_pw_salt, 0, 16); + return err; + } +got_it: + if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, + 16)) + return -EFAULT; + return 0; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -1388,6 +1475,12 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_shutdown(filp, arg); case FITRIM: return f2fs_ioc_fitrim(filp, arg); + case F2FS_IOC_SET_ENCRYPTION_POLICY: + return f2fs_ioc_set_encryption_policy(filp, arg); + case F2FS_IOC_GET_ENCRYPTION_POLICY: + return f2fs_ioc_get_encryption_policy(filp, arg); + case F2FS_IOC_GET_ENCRYPTION_PWSALT: + return f2fs_ioc_get_encryption_pwsalt(filp, arg); default: return -ENOTTY; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 0569d5f8..43da08a3 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -583,6 +583,9 @@ static int __f2fs_setxattr(struct inode *inode, int index, inode->i_ctime = CURRENT_TIME; clear_inode_flag(fi, FI_ACL_MODE); } + if (index == F2FS_XATTR_INDEX_ENCRYPTION && + !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) + f2fs_set_encrypted_inode(inode); if (ipage) update_inode(inode, ipage); From aab3150c143b756e4844b7c264f56e14bcdb3731 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 19:52:47 -0700 Subject: [PATCH 267/321] f2fs crypto: add f2fs encryption facilities Most of parts were copied from ext4, except: - add f2fs_restore_and_release_control_page which returns control page and restore control page - remove ext4_encrypted_zeroout() - remove sbi->s_file_encryption_mode & sbi->s_dir_encryption_mode - add f2fs_end_io_crypto_work for mpage_end_io Signed-off-by: Michael Halcrow Signed-off-by: Ildar Muslukhov Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/Makefile | 2 +- fs/f2fs/crypto.c | 560 +++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 25 +++ fs/f2fs/super.c | 3 + 4 files changed, 589 insertions(+), 1 deletion(-) create mode 100644 fs/f2fs/crypto.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 7864f4f0..a79907b4 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -6,4 +6,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o -f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o +f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c new file mode 100644 index 00000000..c910fa72 --- /dev/null +++ b/fs/f2fs/crypto.c @@ -0,0 +1,560 @@ +/* + * linux/fs/f2fs/crypto.c + * + * Copied from linux/fs/ext4/crypto.c + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility + * + * This contains encryption functions for f2fs + * + * Written by Michael Halcrow, 2014. + * + * Filename encryption additions + * Uday Savagaonkar, 2014 + * Encryption policy handling additions + * Ildar Muslukhov, 2014 + * Remove ext4_encrypted_zeroout(), + * add f2fs_restore_and_release_control_page() + * Jaegeuk Kim, 2015. + * + * This has not yet undergone a rigorous security audit. + * + * The usage of AES-XTS should conform to recommendations in NIST + * Special Publication 800-38E and IEEE P1619/D16. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "xattr.h" + +/* Encryption added and removed here! (L: */ + +static unsigned int num_prealloc_crypto_pages = 32; +static unsigned int num_prealloc_crypto_ctxs = 128; + +module_param(num_prealloc_crypto_pages, uint, 0444); +MODULE_PARM_DESC(num_prealloc_crypto_pages, + "Number of crypto pages to preallocate"); +module_param(num_prealloc_crypto_ctxs, uint, 0444); +MODULE_PARM_DESC(num_prealloc_crypto_ctxs, + "Number of crypto contexts to preallocate"); + +static mempool_t *f2fs_bounce_page_pool; + +static LIST_HEAD(f2fs_free_crypto_ctxs); +static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock); + +struct workqueue_struct *f2fs_read_workqueue; +static DEFINE_MUTEX(crypto_init); + +/** + * f2fs_release_crypto_ctx() - Releases an encryption context + * @ctx: The encryption context to release. + * + * If the encryption context was allocated from the pre-allocated pool, returns + * it to that pool. Else, frees it. + * + * If there's a bounce page in the context, this frees that. + */ +void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) +{ + unsigned long flags; + + if (ctx->bounce_page) { + if (ctx->flags & F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) + __free_page(ctx->bounce_page); + else + mempool_free(ctx->bounce_page, f2fs_bounce_page_pool); + ctx->bounce_page = NULL; + } + ctx->control_page = NULL; + if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { + if (ctx->tfm) + crypto_free_tfm(ctx->tfm); + kfree(ctx); + } else { + spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); + list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); + spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); + } +} + +/** + * f2fs_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context + * @mask: The allocation mask. + * + * Return: An allocated and initialized encryption context on success. An error + * value or NULL otherwise. + */ +static struct f2fs_crypto_ctx *f2fs_alloc_and_init_crypto_ctx(gfp_t mask) +{ + struct f2fs_crypto_ctx *ctx = kzalloc(sizeof(struct f2fs_crypto_ctx), + mask); + + if (!ctx) + return ERR_PTR(-ENOMEM); + return ctx; +} + +/** + * f2fs_get_crypto_ctx() - Gets an encryption context + * @inode: The inode for which we are doing the crypto + * + * Allocates and initializes an encryption context. + * + * Return: An allocated and initialized encryption context on success; error + * value or NULL otherwise. + */ +struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) +{ + struct f2fs_crypto_ctx *ctx = NULL; + int res = 0; + unsigned long flags; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + BUG_ON(ci == NULL); + /* + * We first try getting the ctx from a free list because in + * the common case the ctx will have an allocated and + * initialized crypto tfm, so it's probably a worthwhile + * optimization. For the bounce page, we first try getting it + * from the kernel allocator because that's just about as fast + * as getting it from a list and because a cache of free pages + * should generally be a "last resort" option for a filesystem + * to be able to do its job. + */ + spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); + ctx = list_first_entry_or_null(&f2fs_free_crypto_ctxs, + struct f2fs_crypto_ctx, free_list); + if (ctx) + list_del(&ctx->free_list); + spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); + if (!ctx) { + ctx = f2fs_alloc_and_init_crypto_ctx(GFP_NOFS); + if (IS_ERR(ctx)) { + res = PTR_ERR(ctx); + goto out; + } + ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; + } else { + ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; + } + + /* + * Allocate a new Crypto API context if we don't already have + * one or if it isn't the right mode. + */ + BUG_ON(ci->ci_mode == F2FS_ENCRYPTION_MODE_INVALID); + if (ctx->tfm && (ctx->mode != ci->ci_mode)) { + crypto_free_tfm(ctx->tfm); + ctx->tfm = NULL; + ctx->mode = F2FS_ENCRYPTION_MODE_INVALID; + } + if (!ctx->tfm) { + switch (ci->ci_mode) { + case F2FS_ENCRYPTION_MODE_AES_256_XTS: + ctx->tfm = crypto_ablkcipher_tfm( + crypto_alloc_ablkcipher("xts(aes)", 0, 0)); + break; + case F2FS_ENCRYPTION_MODE_AES_256_GCM: + /* + * TODO(mhalcrow): AEAD w/ gcm(aes); + * crypto_aead_setauthsize() + */ + ctx->tfm = ERR_PTR(-ENOTSUPP); + break; + default: + BUG(); + } + if (IS_ERR_OR_NULL(ctx->tfm)) { + res = PTR_ERR(ctx->tfm); + ctx->tfm = NULL; + goto out; + } + ctx->mode = ci->ci_mode; + } + BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_mode)); + + /* + * There shouldn't be a bounce page attached to the crypto + * context at this point. + */ + BUG_ON(ctx->bounce_page); + +out: + if (res) { + if (!IS_ERR_OR_NULL(ctx)) + f2fs_release_crypto_ctx(ctx); + ctx = ERR_PTR(res); + } + return ctx; +} + +/* + * Call f2fs_decrypt on every single page, reusing the encryption + * context. + */ +static void completion_pages(struct work_struct *work) +{ + struct f2fs_crypto_ctx *ctx = + container_of(work, struct f2fs_crypto_ctx, work); + struct bio *bio = ctx->bio; + struct bio_vec *bv; + int i; + + bio_for_each_segment_all(bv, bio, i) { + struct page *page = bv->bv_page; + int ret = f2fs_decrypt(ctx, page); + + if (ret) { + WARN_ON_ONCE(1); + SetPageError(page); + } else + SetPageUptodate(page); + unlock_page(page); + } + f2fs_release_crypto_ctx(ctx); + bio_put(bio); +} + +void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio) +{ + INIT_WORK(&ctx->work, completion_pages); + ctx->bio = bio; + queue_work(f2fs_read_workqueue, &ctx->work); +} + +/** + * f2fs_exit_crypto() - Shutdown the f2fs encryption system + */ +void f2fs_exit_crypto(void) +{ + struct f2fs_crypto_ctx *pos, *n; + + list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) { + if (pos->bounce_page) { + if (pos->flags & + F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) + __free_page(pos->bounce_page); + else + mempool_free(pos->bounce_page, + f2fs_bounce_page_pool); + } + if (pos->tfm) + crypto_free_tfm(pos->tfm); + kfree(pos); + } + INIT_LIST_HEAD(&f2fs_free_crypto_ctxs); + if (f2fs_bounce_page_pool) + mempool_destroy(f2fs_bounce_page_pool); + f2fs_bounce_page_pool = NULL; + if (f2fs_read_workqueue) + destroy_workqueue(f2fs_read_workqueue); + f2fs_read_workqueue = NULL; +} + +/** + * f2fs_init_crypto() - Set up for f2fs encryption. + * + * We only call this when we start accessing encrypted files, since it + * results in memory getting allocated that wouldn't otherwise be used. + * + * Return: Zero on success, non-zero otherwise. + */ +int f2fs_init_crypto(void) +{ + int i, res; + + mutex_lock(&crypto_init); + if (f2fs_read_workqueue) + goto already_initialized; + + f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0); + if (!f2fs_read_workqueue) { + res = -ENOMEM; + goto fail; + } + + for (i = 0; i < num_prealloc_crypto_ctxs; i++) { + struct f2fs_crypto_ctx *ctx; + + ctx = f2fs_alloc_and_init_crypto_ctx(GFP_KERNEL); + if (IS_ERR(ctx)) { + res = PTR_ERR(ctx); + goto fail; + } + list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); + } + + f2fs_bounce_page_pool = + mempool_create_page_pool(num_prealloc_crypto_pages, 0); + if (!f2fs_bounce_page_pool) { + res = -ENOMEM; + goto fail; + } +already_initialized: + mutex_unlock(&crypto_init); + return 0; +fail: + f2fs_exit_crypto(); + mutex_unlock(&crypto_init); + return res; +} + +void f2fs_restore_and_release_control_page(struct page **page) +{ + struct f2fs_crypto_ctx *ctx; + struct page *bounce_page; + + /* The bounce data pages are unmapped. */ + if ((*page)->mapping) + return; + + /* The bounce data page is unmapped. */ + bounce_page = *page; + ctx = (struct f2fs_crypto_ctx *)page_private(bounce_page); + + /* restore control page */ + *page = ctx->control_page; + + f2fs_restore_control_page(bounce_page); +} + +void f2fs_restore_control_page(struct page *data_page) +{ + struct f2fs_crypto_ctx *ctx = + (struct f2fs_crypto_ctx *)page_private(data_page); + + set_page_private(data_page, (unsigned long)NULL); + ClearPagePrivate(data_page); + unlock_page(data_page); + f2fs_release_crypto_ctx(ctx); +} + +/** + * f2fs_crypt_complete() - The completion callback for page encryption + * @req: The asynchronous encryption request context + * @res: The result of the encryption operation + */ +static void f2fs_crypt_complete(struct crypto_async_request *req, int res) +{ + struct f2fs_completion_result *ecr = req->data; + + if (res == -EINPROGRESS) + return; + ecr->res = res; + complete(&ecr->completion); +} + +typedef enum { + F2FS_DECRYPT = 0, + F2FS_ENCRYPT, +} f2fs_direction_t; + +static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx, + struct inode *inode, + f2fs_direction_t rw, + pgoff_t index, + struct page *src_page, + struct page *dest_page) +{ + u8 xts_tweak[F2FS_XTS_TWEAK_SIZE]; + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct scatterlist dst, src; + struct f2fs_inode_info *fi = F2FS_I(inode); + struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm); + int res = 0; + + BUG_ON(!ctx->tfm); + BUG_ON(ctx->mode != fi->i_crypt_info->ci_mode); + + if (ctx->mode != F2FS_ENCRYPTION_MODE_AES_256_XTS) { + printk_ratelimited(KERN_ERR + "%s: unsupported crypto algorithm: %d\n", + __func__, ctx->mode); + return -ENOTSUPP; + } + + crypto_ablkcipher_clear_flags(atfm, ~0); + crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY); + + res = crypto_ablkcipher_setkey(atfm, fi->i_crypt_info->ci_raw, + fi->i_crypt_info->ci_size); + if (res) { + printk_ratelimited(KERN_ERR + "%s: crypto_ablkcipher_setkey() failed\n", + __func__); + return res; + } + req = ablkcipher_request_alloc(atfm, GFP_NOFS); + if (!req) { + printk_ratelimited(KERN_ERR + "%s: crypto_request_alloc() failed\n", + __func__); + return -ENOMEM; + } + ablkcipher_request_set_callback( + req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + f2fs_crypt_complete, &ecr); + + BUILD_BUG_ON(F2FS_XTS_TWEAK_SIZE < sizeof(index)); + memcpy(xts_tweak, &index, sizeof(index)); + memset(&xts_tweak[sizeof(index)], 0, + F2FS_XTS_TWEAK_SIZE - sizeof(index)); + + sg_init_table(&dst, 1); + sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0); + sg_init_table(&src, 1); + sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0); + ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE, + xts_tweak); + if (rw == F2FS_DECRYPT) + res = crypto_ablkcipher_decrypt(req); + else + res = crypto_ablkcipher_encrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } + ablkcipher_request_free(req); + if (res) { + printk_ratelimited(KERN_ERR + "%s: crypto_ablkcipher_encrypt() returned %d\n", + __func__, res); + return res; + } + return 0; +} + +/** + * f2fs_encrypt() - Encrypts a page + * @inode: The inode for which the encryption should take place + * @plaintext_page: The page to encrypt. Must be locked. + * + * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx + * encryption context. + * + * Called on the page write path. The caller must call + * f2fs_restore_control_page() on the returned ciphertext page to + * release the bounce buffer and the encryption context. + * + * Return: An allocated page with the encrypted content on success. Else, an + * error value or NULL. + */ +struct page *f2fs_encrypt(struct inode *inode, + struct page *plaintext_page) +{ + struct f2fs_crypto_ctx *ctx; + struct page *ciphertext_page = NULL; + int err; + + BUG_ON(!PageLocked(plaintext_page)); + + ctx = f2fs_get_crypto_ctx(inode); + if (IS_ERR(ctx)) + return (struct page *)ctx; + + /* The encryption operation will require a bounce page. */ + ciphertext_page = alloc_page(GFP_NOFS); + if (!ciphertext_page) { + /* + * This is a potential bottleneck, but at least we'll have + * forward progress. + */ + ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, + GFP_NOFS); + if (WARN_ON_ONCE(!ciphertext_page)) + ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, + GFP_NOFS | __GFP_WAIT); + ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; + } else { + ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; + } + ctx->bounce_page = ciphertext_page; + ctx->control_page = plaintext_page; + err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index, + plaintext_page, ciphertext_page); + if (err) { + f2fs_release_crypto_ctx(ctx); + return ERR_PTR(err); + } + SetPagePrivate(ciphertext_page); + set_page_private(ciphertext_page, (unsigned long)ctx); + lock_page(ciphertext_page); + return ciphertext_page; +} + +/** + * f2fs_decrypt() - Decrypts a page in-place + * @ctx: The encryption context. + * @page: The page to decrypt. Must be locked. + * + * Decrypts page in-place using the ctx encryption context. + * + * Called from the read completion callback. + * + * Return: Zero on success, non-zero otherwise. + */ +int f2fs_decrypt(struct f2fs_crypto_ctx *ctx, struct page *page) +{ + BUG_ON(!PageLocked(page)); + + return f2fs_page_crypto(ctx, page->mapping->host, + F2FS_DECRYPT, page->index, page, page); +} + +/* + * Convenience function which takes care of allocating and + * deallocating the encryption context + */ +int f2fs_decrypt_one(struct inode *inode, struct page *page) +{ + struct f2fs_crypto_ctx *ctx = f2fs_get_crypto_ctx(inode); + int ret; + + if (!ctx) + return -ENOMEM; + ret = f2fs_decrypt(ctx, page); + f2fs_release_crypto_ctx(ctx); + return ret; +} + +bool f2fs_valid_contents_enc_mode(uint32_t mode) +{ + return (mode == F2FS_ENCRYPTION_MODE_AES_256_XTS); +} + +/** + * f2fs_validate_encryption_key_size() - Validate the encryption key size + * @mode: The key mode. + * @size: The key size to validate. + * + * Return: The validated key size for @mode. Zero if invalid. + */ +uint32_t f2fs_validate_encryption_key_size(uint32_t mode, uint32_t size) +{ + if (size == f2fs_encryption_key_size(mode)) + return size; + return 0; +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ab414ba2..d157bd26 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1961,4 +1961,29 @@ int f2fs_is_child_context_consistent_with_parent(struct inode *, int f2fs_inherit_context(struct inode *, struct inode *, struct page *); int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *); int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *); + +/* crypt.c */ +extern struct workqueue_struct *f2fs_read_workqueue; +bool f2fs_valid_contents_enc_mode(uint32_t); +uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t); +struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *); +void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *); +struct page *f2fs_encrypt(struct inode *, struct page *); +int f2fs_decrypt(struct f2fs_crypto_ctx *, struct page *); +int f2fs_decrypt_one(struct inode *, struct page *); +void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); + +#ifdef CONFIG_F2FS_FS_ENCRYPTION +void f2fs_restore_and_release_control_page(struct page **); +void f2fs_restore_control_page(struct page *); + +int f2fs_init_crypto(void); +void f2fs_exit_crypto(void); +#else +static inline void f2fs_restore_and_release_control_page(struct page **p) { } +static inline void f2fs_restore_control_page(struct page *p) { } + +static inline int f2fs_init_crypto(void) { return 0; } +static inline void f2fs_exit_crypto(void) { } +#endif #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 138fa938..d61f74ab 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -416,6 +416,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; +#ifdef CONFIG_F2FS_FS_ENCRYPTION + fi->i_crypt_info = NULL; +#endif return &fi->vfs_inode; } From 35924ffc5c3dbbd727bc58701f1822a5b5108fad Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Apr 2015 16:23:47 -0700 Subject: [PATCH 268/321] f2fs crypto: add encryption key management facilities This patch copies from encrypt_key.c in ext4, and modifies for f2fs. Use GFP_NOFS, since _f2fs_get_encryption_info is called under f2fs_lock_op. Signed-off-by: Michael Halcrow Signed-off-by: Ildar Muslukhov Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/Makefile | 2 +- fs/f2fs/crypto_key.c | 206 ++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 22 +++++ fs/f2fs/f2fs_crypto.h | 3 + 4 files changed, 232 insertions(+), 1 deletion(-) create mode 100644 fs/f2fs/crypto_key.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index a79907b4..b08925d3 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -6,4 +6,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o -f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o +f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o crypto_key.o diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c new file mode 100644 index 00000000..c7d414dd --- /dev/null +++ b/fs/f2fs/crypto_key.c @@ -0,0 +1,206 @@ +/* + * linux/fs/f2fs/crypto_key.c + * + * Copied from linux/fs/f2fs/crypto_key.c + * + * Copyright (C) 2015, Google, Inc. + * + * This contains encryption key functions for f2fs + * + * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "xattr.h" + +static void derive_crypt_complete(struct crypto_async_request *req, int rc) +{ + struct f2fs_completion_result *ecr = req->data; + + if (rc == -EINPROGRESS) + return; + + ecr->res = rc; + complete(&ecr->completion); +} + +/** + * f2fs_derive_key_aes() - Derive a key using AES-128-ECB + * @deriving_key: Encryption key used for derivatio. + * @source_key: Source key to which to apply derivation. + * @derived_key: Derived key. + * + * Return: Zero on success; non-zero otherwise. + */ +static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE], + char source_key[F2FS_AES_256_XTS_KEY_SIZE], + char derived_key[F2FS_AES_256_XTS_KEY_SIZE]) +{ + int res = 0; + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct scatterlist src_sg, dst_sg; + struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0, + 0); + + if (IS_ERR(tfm)) { + res = PTR_ERR(tfm); + tfm = NULL; + goto out; + } + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + res = -ENOMEM; + goto out; + } + ablkcipher_request_set_callback(req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + derive_crypt_complete, &ecr); + res = crypto_ablkcipher_setkey(tfm, deriving_key, + F2FS_AES_128_ECB_KEY_SIZE); + if (res < 0) + goto out; + + sg_init_one(&src_sg, source_key, F2FS_AES_256_XTS_KEY_SIZE); + sg_init_one(&dst_sg, derived_key, F2FS_AES_256_XTS_KEY_SIZE); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, + F2FS_AES_256_XTS_KEY_SIZE, NULL); + res = crypto_ablkcipher_encrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } +out: + if (req) + ablkcipher_request_free(req); + if (tfm) + crypto_free_ablkcipher(tfm); + return res; +} + +void f2fs_free_encryption_info(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *ci = fi->i_crypt_info; + + if (!ci) + return; + + if (ci->ci_keyring_key) + key_put(ci->ci_keyring_key); + crypto_free_ablkcipher(ci->ci_ctfm); + memzero_explicit(&ci->ci_raw, sizeof(ci->ci_raw)); + kfree(ci); + fi->i_crypt_info = NULL; +} + +int _f2fs_get_encryption_info(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *crypt_info; + char full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE + + (F2FS_KEY_DESCRIPTOR_SIZE * 2) + 1]; + struct key *keyring_key = NULL; + struct f2fs_encryption_key *master_key; + struct f2fs_encryption_context ctx; + struct user_key_payload *ukp; + int res; + + if (!f2fs_read_workqueue) { + res = f2fs_init_crypto(); + if (res) + return res; + } + + if (fi->i_crypt_info) { + if (!fi->i_crypt_info->ci_keyring_key || + key_validate(fi->i_crypt_info->ci_keyring_key) == 0) + return 0; + f2fs_free_encryption_info(inode); + } + + res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, + &ctx, sizeof(ctx), NULL); + if (res < 0) + return res; + else if (res != sizeof(ctx)) + return -EINVAL; + res = 0; + + crypt_info = kmalloc(sizeof(struct f2fs_crypt_info), GFP_NOFS); + if (!crypt_info) + return -ENOMEM; + + crypt_info->ci_flags = ctx.flags; + crypt_info->ci_data_mode = ctx.contents_encryption_mode; + crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; + crypt_info->ci_ctfm = NULL; + memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, + sizeof(crypt_info->ci_master_key)); + if (S_ISREG(inode->i_mode)) + crypt_info->ci_mode = ctx.contents_encryption_mode; + else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + crypt_info->ci_mode = ctx.filenames_encryption_mode; + else { + printk(KERN_ERR "f2fs crypto: Unsupported inode type.\n"); + BUG(); + } + crypt_info->ci_size = f2fs_encryption_key_size(crypt_info->ci_mode); + BUG_ON(!crypt_info->ci_size); + + memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX, + F2FS_KEY_DESC_PREFIX_SIZE); + sprintf(full_key_descriptor + F2FS_KEY_DESC_PREFIX_SIZE, + "%*phN", F2FS_KEY_DESCRIPTOR_SIZE, + ctx.master_key_descriptor); + full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE + + (2 * F2FS_KEY_DESCRIPTOR_SIZE)] = '\0'; + keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); + if (IS_ERR(keyring_key)) { + res = PTR_ERR(keyring_key); + keyring_key = NULL; + goto out; + } + BUG_ON(keyring_key->type != &key_type_logon); + ukp = ((struct user_key_payload *)keyring_key->payload.data); + if (ukp->datalen != sizeof(struct f2fs_encryption_key)) { + res = -EINVAL; + goto out; + } + master_key = (struct f2fs_encryption_key *)ukp->data; + BUILD_BUG_ON(F2FS_AES_128_ECB_KEY_SIZE != + F2FS_KEY_DERIVATION_NONCE_SIZE); + BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE); + res = f2fs_derive_key_aes(ctx.nonce, master_key->raw, + crypt_info->ci_raw); +out: + if (res < 0) { + if (res == -ENOKEY) + res = 0; + kfree(crypt_info); + } else { + fi->i_crypt_info = crypt_info; + crypt_info->ci_keyring_key = keyring_key; + keyring_key = NULL; + } + if (keyring_key) + key_put(keyring_key); + return res; +} + +int f2fs_has_encryption_key(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + return (fi->i_crypt_info != NULL); +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d157bd26..ce1ee03d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1973,17 +1973,39 @@ int f2fs_decrypt(struct f2fs_crypto_ctx *, struct page *); int f2fs_decrypt_one(struct inode *, struct page *); void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); +/* crypto_key.c */ +void f2fs_free_encryption_info(struct inode *); +int _f2fs_get_encryption_info(struct inode *inode); + #ifdef CONFIG_F2FS_FS_ENCRYPTION void f2fs_restore_and_release_control_page(struct page **); void f2fs_restore_control_page(struct page *); int f2fs_init_crypto(void); void f2fs_exit_crypto(void); + +int f2fs_has_encryption_key(struct inode *); + +static inline int f2fs_get_encryption_info(struct inode *inode) +{ + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (!ci || + (ci->ci_keyring_key && + (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED) | + (1 << KEY_FLAG_DEAD))))) + return _f2fs_get_encryption_info(inode); + return 0; +} #else static inline void f2fs_restore_and_release_control_page(struct page **p) { } static inline void f2fs_restore_control_page(struct page *p) { } static inline int f2fs_init_crypto(void) { return 0; } static inline void f2fs_exit_crypto(void) { } + +static inline int f2fs_has_encryption_key(struct inode *i) { return 0; } +static inline int f2fs_get_encryption_info(struct inode *i) { return 0; } #endif #endif diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index 3323266e..6e413949 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -65,6 +65,9 @@ struct f2fs_encryption_context { #define F2FS_AES_256_XTS_KEY_SIZE 64 #define F2FS_MAX_KEY_SIZE 64 +#define F2FS_KEY_DESC_PREFIX "f2fs:" +#define F2FS_KEY_DESC_PREFIX_SIZE 5 + struct f2fs_encryption_key { __u32 mode; char raw[F2FS_MAX_KEY_SIZE]; From a911239dce266947255c41c9dc3e6a8f3503b27a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 26 Apr 2015 00:12:50 -0700 Subject: [PATCH 269/321] f2fs crypto: filename encryption facilities This patch adds filename encryption infra. Most of codes are copied from ext4 part, but changed to adjust f2fs directory structure. Signed-off-by: Uday Savagaonkar Signed-off-by: Ildar Muslukhov Signed-off-by: Michael Halcrow Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/Makefile | 3 +- fs/f2fs/crypto_fname.c | 490 +++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 50 +++++ 3 files changed, 542 insertions(+), 1 deletion(-) create mode 100644 fs/f2fs/crypto_fname.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index b08925d3..396be1a3 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -6,4 +6,5 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o -f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o crypto_key.o +f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o \ + crypto_key.o crypto_fname.o diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c new file mode 100644 index 00000000..8f3ff9b3 --- /dev/null +++ b/fs/f2fs/crypto_fname.c @@ -0,0 +1,490 @@ +/* + * linux/fs/f2fs/crypto_fname.c + * + * Copied from linux/fs/ext4/crypto.c + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility + * + * This contains functions for filename crypto management in f2fs + * + * Written by Uday Savagaonkar, 2014. + * + * Adjust f2fs dentry structure + * Jaegeuk Kim, 2015. + * + * This has not yet undergone a rigorous security audit. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "f2fs_crypto.h" +#include "xattr.h" + +/** + * f2fs_dir_crypt_complete() - + */ +static void f2fs_dir_crypt_complete(struct crypto_async_request *req, int res) +{ + struct f2fs_completion_result *ecr = req->data; + + if (res == -EINPROGRESS) + return; + ecr->res = res; + complete(&ecr->completion); +} + +bool f2fs_valid_filenames_enc_mode(uint32_t mode) +{ + return (mode == F2FS_ENCRYPTION_MODE_AES_256_CTS); +} + +static unsigned max_name_len(struct inode *inode) +{ + return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize : + F2FS_NAME_LEN; +} + +/** + * f2fs_fname_encrypt() - + * + * This function encrypts the input filename, and returns the length of the + * ciphertext. Errors are returned as negative numbers. We trust the caller to + * allocate sufficient memory to oname string. + */ +static int f2fs_fname_encrypt(struct inode *inode, + const struct qstr *iname, struct f2fs_str *oname) +{ + u32 ciphertext_len; + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; + int res = 0; + char iv[F2FS_CRYPTO_BLOCK_SIZE]; + struct scatterlist src_sg, dst_sg; + int padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK); + char *workbuf, buf[32], *alloc_buf = NULL; + unsigned lim = max_name_len(inode); + + if (iname->len <= 0 || iname->len > lim) + return -EIO; + + ciphertext_len = (iname->len < F2FS_CRYPTO_BLOCK_SIZE) ? + F2FS_CRYPTO_BLOCK_SIZE : iname->len; + ciphertext_len = f2fs_fname_crypto_round_up(ciphertext_len, padding); + ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len; + + if (ciphertext_len <= sizeof(buf)) { + workbuf = buf; + } else { + alloc_buf = kmalloc(ciphertext_len, GFP_NOFS); + if (!alloc_buf) + return -ENOMEM; + workbuf = alloc_buf; + } + + /* Allocate request */ + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + printk_ratelimited(KERN_ERR + "%s: crypto_request_alloc() failed\n", __func__); + kfree(alloc_buf); + return -ENOMEM; + } + ablkcipher_request_set_callback(req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + f2fs_dir_crypt_complete, &ecr); + + /* Copy the input */ + memcpy(workbuf, iname->name, iname->len); + if (iname->len < ciphertext_len) + memset(workbuf + iname->len, 0, ciphertext_len - iname->len); + + /* Initialize IV */ + memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE); + + /* Create encryption request */ + sg_init_one(&src_sg, workbuf, ciphertext_len); + sg_init_one(&dst_sg, oname->name, ciphertext_len); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); + res = crypto_ablkcipher_encrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } + kfree(alloc_buf); + ablkcipher_request_free(req); + if (res < 0) { + printk_ratelimited(KERN_ERR + "%s: Error (error code %d)\n", __func__, res); + } + oname->len = ciphertext_len; + return res; +} + +/* + * f2fs_fname_decrypt() + * This function decrypts the input filename, and returns + * the length of the plaintext. + * Errors are returned as negative numbers. + * We trust the caller to allocate sufficient memory to oname string. + */ +static int f2fs_fname_decrypt(struct inode *inode, + const struct f2fs_str *iname, struct f2fs_str *oname) +{ + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct scatterlist src_sg, dst_sg; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; + int res = 0; + char iv[F2FS_CRYPTO_BLOCK_SIZE]; + unsigned lim = max_name_len(inode); + + if (iname->len <= 0 || iname->len > lim) + return -EIO; + + /* Allocate request */ + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + printk_ratelimited(KERN_ERR + "%s: crypto_request_alloc() failed\n", __func__); + return -ENOMEM; + } + ablkcipher_request_set_callback(req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + f2fs_dir_crypt_complete, &ecr); + + /* Initialize IV */ + memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE); + + /* Create decryption request */ + sg_init_one(&src_sg, iname->name, iname->len); + sg_init_one(&dst_sg, oname->name, oname->len); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); + res = crypto_ablkcipher_decrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } + ablkcipher_request_free(req); + if (res < 0) { + printk_ratelimited(KERN_ERR + "%s: Error in f2fs_fname_decrypt (error code %d)\n", + __func__, res); + return res; + } + + oname->len = strnlen(oname->name, iname->len); + return oname->len; +} + +static const char *lookup_table = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; + +/** + * f2fs_fname_encode_digest() - + * + * Encodes the input digest using characters from the set [a-zA-Z0-9_+]. + * The encoded string is roughly 4/3 times the size of the input string. + */ +static int digest_encode(const char *src, int len, char *dst) +{ + int i = 0, bits = 0, ac = 0; + char *cp = dst; + + while (i < len) { + ac += (((unsigned char) src[i]) << bits); + bits += 8; + do { + *cp++ = lookup_table[ac & 0x3f]; + ac >>= 6; + bits -= 6; + } while (bits >= 6); + i++; + } + if (bits) + *cp++ = lookup_table[ac & 0x3f]; + return cp - dst; +} + +static int digest_decode(const char *src, int len, char *dst) +{ + int i = 0, bits = 0, ac = 0; + const char *p; + char *cp = dst; + + while (i < len) { + p = strchr(lookup_table, src[i]); + if (p == NULL || src[i] == 0) + return -2; + ac += (p - lookup_table) << bits; + bits += 6; + if (bits >= 8) { + *cp++ = ac & 0xff; + ac >>= 8; + bits -= 8; + } + i++; + } + if (ac) + return -1; + return cp - dst; +} + +int f2fs_setup_fname_crypto(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *ci = fi->i_crypt_info; + struct crypto_ablkcipher *ctfm; + int res; + + /* Check if the crypto policy is set on the inode */ + res = f2fs_encrypted_inode(inode); + if (res == 0) + return 0; + + res = f2fs_get_encryption_info(inode); + if (res < 0) + return res; + ci = fi->i_crypt_info; + + if (!ci || ci->ci_ctfm) + return 0; + + if (ci->ci_mode != F2FS_ENCRYPTION_MODE_AES_256_CTS) { + printk_once(KERN_WARNING "f2fs: unsupported key mode %d\n", + ci->ci_mode); + return -ENOKEY; + } + + ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))", 0, 0); + if (!ctfm || IS_ERR(ctfm)) { + res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; + printk(KERN_DEBUG "%s: error (%d) allocating crypto tfm\n", + __func__, res); + return res; + } + crypto_ablkcipher_clear_flags(ctfm, ~0); + crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), + CRYPTO_TFM_REQ_WEAK_KEY); + + res = crypto_ablkcipher_setkey(ctfm, ci->ci_raw, ci->ci_size); + if (res) { + crypto_free_ablkcipher(ctfm); + return -EIO; + } + ci->ci_ctfm = ctfm; + return 0; +} + +/** + * f2fs_fname_crypto_round_up() - + * + * Return: The next multiple of block size + */ +u32 f2fs_fname_crypto_round_up(u32 size, u32 blksize) +{ + return ((size + blksize - 1) / blksize) * blksize; +} + +/** + * f2fs_fname_crypto_alloc_obuff() - + * + * Allocates an output buffer that is sufficient for the crypto operation + * specified by the context and the direction. + */ +int f2fs_fname_crypto_alloc_buffer(struct inode *inode, + u32 ilen, struct f2fs_str *crypto_str) +{ + unsigned int olen; + int padding = 16; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (ci) + padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK); + if (padding < F2FS_CRYPTO_BLOCK_SIZE) + padding = F2FS_CRYPTO_BLOCK_SIZE; + olen = f2fs_fname_crypto_round_up(ilen, padding); + crypto_str->len = olen; + if (olen < F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2) + olen = F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2; + /* Allocated buffer can hold one more character to null-terminate the + * string */ + crypto_str->name = kmalloc(olen + 1, GFP_NOFS); + if (!(crypto_str->name)) + return -ENOMEM; + return 0; +} + +/** + * f2fs_fname_crypto_free_buffer() - + * + * Frees the buffer allocated for crypto operation. + */ +void f2fs_fname_crypto_free_buffer(struct f2fs_str *crypto_str) +{ + if (!crypto_str) + return; + kfree(crypto_str->name); + crypto_str->name = NULL; +} + +/** + * f2fs_fname_disk_to_usr() - converts a filename from disk space to user space + */ +int f2fs_fname_disk_to_usr(struct inode *inode, + f2fs_hash_t *hash, + const struct f2fs_str *iname, + struct f2fs_str *oname) +{ + const struct qstr qname = FSTR_TO_QSTR(iname); + char buf[24]; + int ret; + + if (is_dot_dotdot(&qname)) { + oname->name[0] = '.'; + oname->name[iname->len - 1] = '.'; + oname->len = iname->len; + return oname->len; + } + + if (F2FS_I(inode)->i_crypt_info) + return f2fs_fname_decrypt(inode, iname, oname); + + if (iname->len <= F2FS_FNAME_CRYPTO_DIGEST_SIZE) { + ret = digest_encode(iname->name, iname->len, oname->name); + oname->len = ret; + return ret; + } + if (hash) { + memcpy(buf, hash, 4); + memset(buf + 4, 0, 4); + } else + memset(buf, 0, 8); + memcpy(buf + 8, iname->name + iname->len - 16, 16); + oname->name[0] = '_'; + ret = digest_encode(buf, 24, oname->name + 1); + oname->len = ret + 1; + return ret + 1; +} + +/** + * f2fs_fname_usr_to_disk() - converts a filename from user space to disk space + */ +int f2fs_fname_usr_to_disk(struct inode *inode, + const struct qstr *iname, + struct f2fs_str *oname) +{ + int res; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (is_dot_dotdot(iname)) { + oname->name[0] = '.'; + oname->name[iname->len - 1] = '.'; + oname->len = iname->len; + return oname->len; + } + + if (ci) { + res = f2fs_fname_encrypt(inode, iname, oname); + return res; + } + /* Without a proper key, a user is not allowed to modify the filenames + * in a directory. Consequently, a user space name cannot be mapped to + * a disk-space name */ + return -EACCES; +} + +int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct f2fs_filename *fname) +{ + struct f2fs_crypt_info *ci; + int ret = 0, bigname = 0; + + memset(fname, 0, sizeof(struct f2fs_filename)); + fname->usr_fname = iname; + + if (!f2fs_encrypted_inode(dir) || is_dot_dotdot(iname)) { + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + goto out; + } + ret = f2fs_setup_fname_crypto(dir); + if (ret) + return ret; + ci = F2FS_I(dir)->i_crypt_info; + if (ci) { + ret = f2fs_fname_crypto_alloc_buffer(dir, iname->len, + &fname->crypto_buf); + if (ret < 0) + goto out; + ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf); + if (ret < 0) + goto out; + fname->disk_name.name = fname->crypto_buf.name; + fname->disk_name.len = fname->crypto_buf.len; + ret = 0; + goto out; + } + if (!lookup) { + ret = -EACCES; + goto out; + } + + /* We don't have the key and we are doing a lookup; decode the + * user-supplied name + */ + if (iname->name[0] == '_') + bigname = 1; + if ((bigname && (iname->len != 33)) || + (!bigname && (iname->len > 43))) { + ret = -ENOENT; + } + fname->crypto_buf.name = kmalloc(32, GFP_KERNEL); + if (fname->crypto_buf.name == NULL) { + ret = -ENOMEM; + goto out; + } + ret = digest_decode(iname->name + bigname, iname->len - bigname, + fname->crypto_buf.name); + if (ret < 0) { + ret = -ENOENT; + goto out; + } + fname->crypto_buf.len = ret; + if (bigname) { + memcpy(&fname->hash, fname->crypto_buf.name, 4); + } else { + fname->disk_name.name = fname->crypto_buf.name; + fname->disk_name.len = fname->crypto_buf.len; + } + ret = 0; +out: + return ret; +} + +void f2fs_fname_free_filename(struct f2fs_filename *fname) +{ + kfree(fname->crypto_buf.name); + fname->crypto_buf.name = NULL; + fname->usr_fname = NULL; + fname->disk_name.name = NULL; +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ce1ee03d..9c358936 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -256,6 +256,25 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, * For INODE and NODE manager */ /* for directory operations */ +struct f2fs_str { + unsigned char *name; + u32 len; +}; + +struct f2fs_filename { + const struct qstr *usr_fname; + struct f2fs_str disk_name; + f2fs_hash_t hash; +#ifdef CONFIG_F2FS_FS_ENCRYPTION + struct f2fs_str crypto_buf; +#endif +}; + +#define FSTR_INIT(n, l) { .name = n, .len = l } +#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) + struct f2fs_dentry_ptr { const void *bitmap; struct f2fs_dir_entry *dentry; @@ -1977,6 +1996,15 @@ void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); void f2fs_free_encryption_info(struct inode *); int _f2fs_get_encryption_info(struct inode *inode); +/* crypto_fname.c */ +bool f2fs_valid_filenames_enc_mode(uint32_t); +u32 f2fs_fname_crypto_round_up(u32, u32); +int f2fs_fname_crypto_alloc_buffer(struct inode *, u32, struct f2fs_str *); +int f2fs_fname_disk_to_usr(struct inode *, f2fs_hash_t *, + const struct f2fs_str *, struct f2fs_str *); +int f2fs_fname_usr_to_disk(struct inode *, const struct qstr *, + struct f2fs_str *); + #ifdef CONFIG_F2FS_FS_ENCRYPTION void f2fs_restore_and_release_control_page(struct page **); void f2fs_restore_control_page(struct page *); @@ -1998,6 +2026,12 @@ static inline int f2fs_get_encryption_info(struct inode *inode) return _f2fs_get_encryption_info(inode); return 0; } + +int f2fs_setup_fname_crypto(struct inode *); +void f2fs_fname_crypto_free_buffer(struct f2fs_str *); +int f2fs_fname_setup_filename(struct inode *, const struct qstr *, + int lookup, struct f2fs_filename *); +void f2fs_fname_free_filename(struct f2fs_filename *); #else static inline void f2fs_restore_and_release_control_page(struct page **p) { } static inline void f2fs_restore_control_page(struct page *p) { } @@ -2007,5 +2041,21 @@ static inline void f2fs_exit_crypto(void) { } static inline int f2fs_has_encryption_key(struct inode *i) { return 0; } static inline int f2fs_get_encryption_info(struct inode *i) { return 0; } + +static inline int f2fs_setup_fname_crypto(struct inode *i) { return 0; } +static inline void f2fs_fname_crypto_free_buffer(struct f2fs_str *p) { } + +static inline int f2fs_fname_setup_filename(struct inode *dir, + const struct qstr *iname, + int lookup, struct f2fs_filename *fname) +{ + memset(fname, 0, sizeof(struct f2fs_filename)); + fname->usr_fname = iname; + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + return 0; +} + +static inline void f2fs_fname_free_filename(struct f2fs_filename *fname) { } #endif #endif From efa5d316251b7fdc25a3d3e05d4a1c6c4fb880b3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Apr 2015 20:39:58 -0700 Subject: [PATCH 270/321] f2fs crypto: activate encryption support for fs APIs This patch activates the following APIs for encryption support. The rules quoted by ext4 are: - An unencrypted directory may contain encrypted or unencrypted files or directories. - All files or directories in a directory must be protected using the same key as their containing directory. - Encrypted inode for regular file should not have inline_data. - Encrypted symlink and directory may have inline_data and inline_dentry. This patch activates the following APIs. 1. f2fs_link : validate context 2. f2fs_lookup : '' 3. f2fs_rename : '' 4. f2fs_create/f2fs_mkdir : inherit its dir's context 5. f2fs_direct_IO : do buffered io for regular files 6. f2fs_open : check encryption info 7. f2fs_file_mmap : '' 8. f2fs_setattr : '' 9. f2fs_file_write_iter : '' (Called by sys_io_submit) 10. f2fs_fallocate : do not support fcollapse 11. f2fs_evict_inode : free_encryption_info Signed-off-by: Michael Halcrow Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/data.c fs/f2fs/file.c Change-Id: Ida94d979d559faeb8609992e2d520b58eb1f79d2 --- fs/f2fs/data.c | 3 +++ fs/f2fs/dir.c | 6 ++++++ fs/f2fs/f2fs.h | 11 +++++++++++ fs/f2fs/file.c | 27 ++++++++++++++++++++++++++- fs/f2fs/inline.c | 3 +++ fs/f2fs/inode.c | 4 ++++ fs/f2fs/namei.c | 38 ++++++++++++++++++++++++++++++++------ 7 files changed, 85 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e14762c7..0b81ab31 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1985,6 +1985,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, return err; } + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + return 0; + if (check_direct_IO(inode, rw, iov, offset, nr_segs)) return 0; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2f58e7c5..582a02fd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -405,6 +405,12 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, err = f2fs_init_security(inode, dir, name, page); if (err) goto put_error; + + if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) { + err = f2fs_inherit_context(dir, inode, page); + if (err) + goto put_error; + } } else { page = get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9c358936..57474a59 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1974,6 +1974,17 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb) #endif } +static inline bool f2fs_may_encrypt(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + mode_t mode = inode->i_mode; + + return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); +#else + return 0; +#endif +} + /* crypto_policy.c */ int f2fs_is_child_context_consistent_with_parent(struct inode *, struct inode *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8fe79429..30dd669f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -426,6 +426,12 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); + if (f2fs_encrypted_inode(inode)) { + int err = f2fs_get_encryption_info(inode); + if (err) + return 0; + } + /* we don't need to use inline_data strictly */ if (f2fs_has_inline_data(inode)) { int err = f2fs_convert_inline_inode(inode); @@ -438,6 +444,18 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +static int f2fs_file_open(struct inode *inode, struct file *filp) +{ + int ret = generic_file_open(inode, filp); + + if (!ret && f2fs_encrypted_inode(inode)) { + ret = f2fs_get_encryption_info(inode); + if (ret) + ret = -EACCES; + } + return ret; +} + int truncate_data_blocks_range(struct dnode_of_data *dn, int count) { int nr_free = 0, ofs = dn->ofs_in_node; @@ -645,6 +663,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (attr->ia_valid & ATTR_SIZE) { + if (f2fs_encrypted_inode(inode) && + f2fs_get_encryption_info(inode)) + return -EACCES; + if (attr->ia_size != i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); @@ -1081,6 +1103,9 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; + if (f2fs_encrypted_inode(inode) && (mode & FALLOC_FL_COLLAPSE_RANGE)) + return -EOPNOTSUPP; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; @@ -1509,7 +1534,7 @@ const struct file_operations f2fs_file_operations = { .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, - .open = generic_file_open, + .open = f2fs_file_open, .release = f2fs_release_file, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index aa9321c4..11896429 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -27,6 +27,9 @@ bool f2fs_may_inline_data(struct inode *inode) if (i_size_read(inode) > MAX_INLINE_DATA) return false; + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + return false; + return true; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 42d2bf20..107bd6a7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -358,6 +358,10 @@ void f2fs_evict_inode(struct inode *inode) if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); out_clear: +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_I(inode)->i_crypt_info) + f2fs_free_encryption_info(inode); +#endif clear_inode(inode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index d28e1ff9..9f97176a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -57,6 +57,10 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto out; } + /* If the directory encrypted, then we should encrypt the inode. */ + if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) + f2fs_set_encrypted_inode(inode); + if (f2fs_may_inline_data(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) @@ -158,6 +162,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; + if (f2fs_encrypted_inode(dir) && + !f2fs_is_child_context_consistent_with_parent(dir, inode)) + return -EPERM; + f2fs_balance_fs(sbi); inode->i_ctime = CURRENT_TIME; @@ -236,6 +244,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_dir_entry *de; struct page *page; nid_t ino; + int err = 0; if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -252,16 +261,26 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) return ERR_CAST(inode); - if (f2fs_has_inline_dots(inode)) { - int err; + if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode) && + !f2fs_is_child_context_consistent_with_parent(dir, inode)) { + iput(inode); + f2fs_msg(inode->i_sb, KERN_WARNING, + "Inconsistent encryption contexts: %lu/%lu\n", + (unsigned long)dir->i_ino, + (unsigned long)inode->i_ino); + return ERR_PTR(-EPERM); + } + if (f2fs_has_inline_dots(inode)) { err = __recover_dot_dentries(inode, dir->i_ino); - if (err) { - iget_failed(inode); - return ERR_PTR(err); - } + if (err) + goto err_out; } return d_splice_alias(inode, dentry); + +err_out: + iget_failed(inode); + return ERR_PTR(err); } static int f2fs_unlink(struct inode *dir, struct dentry *dentry) @@ -463,6 +482,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *new_entry; int err = -ENOENT; + if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) && + !f2fs_is_child_context_consistent_with_parent(new_dir, + old_inode)) { + err = -EPERM; + goto out; + } + f2fs_balance_fs(sbi); old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page, 0); From fbb2f24a2d80ee698b4328aa82e534fa58aac555 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Apr 2015 12:04:33 -0700 Subject: [PATCH 271/321] f2fs crypto: add encryption support in read/write paths This patch adds encryption support in read and write paths. Note that, in f2fs, we need to consider cleaning operation. In cleaning procedure, we must avoid encrypting and decrypting written blocks. So, this patch implements move_encrypted_block(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 +- fs/f2fs/data.c | 93 +++++++++++++++++++++++++++++++++----------- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 79 ++++++++++++++++++++++++++++++++++++- fs/f2fs/inline.c | 1 + fs/f2fs/node.c | 2 + fs/f2fs/segment.c | 24 ++++++++++-- 8 files changed, 177 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index fd7a21df..a43c17f8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -56,6 +56,7 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO, .blk_addr = index, + .encrypted_page = NULL, }; repeat: page = grab_cache_page(mapping, index); @@ -123,7 +124,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type struct f2fs_io_info fio = { .sbi = sbi, .type = META, - .rw = READ_SYNC | REQ_META | REQ_PRIO + .rw = READ_SYNC | REQ_META | REQ_PRIO, + .encrypted_page = NULL, }; for (; nrpages-- > 0; blkno++) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0b81ab31..06f65b20 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -58,6 +58,15 @@ static void mpage_end_io(struct bio *bio, int err) struct bio_vec *bv; int i; + if (f2fs_bio_encrypted(bio)) { + if (err) { + f2fs_release_crypto_ctx(bio->bi_private); + } else { + f2fs_end_io_crypto_work(bio->bi_private, bio); + return; + } + } + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; @@ -82,6 +91,8 @@ static void f2fs_write_end_io(struct bio *bio, int err) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; + f2fs_restore_and_release_control_page(&page); + if (unlikely(err)) { set_page_dirty(page); set_bit(AS_EIO, &page->mapping->flags); @@ -162,7 +173,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; - struct page *page = fio->page; + struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -186,6 +197,7 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio) enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; bool is_read = is_read_io(fio->rw); + struct page *bio_page; io = is_read ? &sbi->read_io : &sbi->write_io[btype]; @@ -207,7 +219,9 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio) io->fio = *fio; } - if (bio_add_page(io->bio, fio->page, PAGE_CACHE_SIZE, 0) < + bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; + + if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { __submit_merged_bio(io); goto alloc_new; @@ -929,8 +943,12 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) .sbi = F2FS_I_SB(inode), .type = DATA, .rw = rw, + .encrypted_page = NULL, }; + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + return read_mapping_page(mapping, index, NULL); + page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -1067,26 +1085,14 @@ struct page *get_new_data_page(struct inode *inode, zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); } else { - struct f2fs_io_info fio = { - .sbi = F2FS_I_SB(inode), - .type = DATA, - .rw = READ_SYNC, - .blk_addr = dn.data_blkaddr, - .page = page, - }; - err = f2fs_submit_page_bio(&fio); - if (err) - return ERR_PTR(err); + f2fs_put_page(page, 1); - lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); + page = get_read_data_page(inode, index, READ_SYNC); + if (IS_ERR(page)) goto repeat; - } + + /* wait for read completion */ + lock_page(page); } got_it: if (new_i_size && @@ -1549,14 +1555,38 @@ static int f2fs_mpage_readpages(struct address_space *mapping, bio = NULL; } if (bio == NULL) { + struct f2fs_crypto_ctx *ctx = NULL; + + if (f2fs_encrypted_inode(inode) && + S_ISREG(inode->i_mode)) { + struct page *cpage; + + ctx = f2fs_get_crypto_ctx(inode); + if (IS_ERR(ctx)) + goto set_error_page; + + /* wait the page to be moved by cleaning */ + cpage = find_lock_page( + META_MAPPING(F2FS_I_SB(inode)), + block_nr); + if (cpage) { + f2fs_wait_on_page_writeback(cpage, + DATA); + f2fs_put_page(cpage, 1); + } + } + bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, bio_get_nr_vecs(bdev))); - if (!bio) + if (!bio) { + if (ctx) + f2fs_release_crypto_ctx(ctx); goto set_error_page; + } bio->bi_bdev = bdev; bio->bi_sector = SECTOR_FROM_BLOCK(block_nr); bio->bi_end_io = mpage_end_io; - bio->bi_private = NULL; + bio->bi_private = ctx; } if (bio_add_page(bio, page, blocksize, 0) < blocksize) @@ -1633,6 +1663,14 @@ int do_write_data_page(struct f2fs_io_info *fio) goto out_writepage; } + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + fio->encrypted_page = f2fs_encrypt(inode, fio->page); + if (IS_ERR(fio->encrypted_page)) { + err = PTR_ERR(fio->encrypted_page); + goto out_writepage; + } + } + set_page_writeback(page); /* @@ -1675,6 +1713,7 @@ static int f2fs_write_data_page(struct page *page, .type = DATA, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, .page = page, + .encrypted_page = NULL, }; trace_f2fs_writepage(page, DATA); @@ -1898,6 +1937,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, .rw = READ_SYNC, .blk_addr = dn.data_blkaddr, .page = page, + .encrypted_page = NULL, }; err = f2fs_submit_page_bio(&fio); if (err) @@ -1913,6 +1953,15 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_put_page(page, 1); goto repeat; } + + /* avoid symlink page */ + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + err = f2fs_decrypt_one(inode, page); + if (err) { + f2fs_put_page(page, 1); + goto fail; + } + } } out: SetPageUptodate(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 57474a59..fae7247a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -655,6 +655,7 @@ struct f2fs_io_info { int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ block_t blk_addr; /* block address to be written */ struct page *page; /* page to be written */ + struct page *encrypted_page; /* encrypted page */ }; #define is_read_io(rw) (((rw) & 1) == READ) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 30dd669f..f35be662 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -522,7 +522,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, truncate_out: f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); - if (!cache_only) + if (!cache_only || !f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode)) set_page_dirty(page); f2fs_put_page(page, 1); return 0; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2e2afebd..43354cb3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -518,6 +518,72 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return 1; } +static void move_encrypted_block(struct inode *inode, block_t bidx) +{ + struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), + .type = DATA, + .rw = READ_SYNC, + .encrypted_page = NULL, + }; + struct dnode_of_data dn; + struct f2fs_summary sum; + struct node_info ni; + struct page *page; + int err; + + /* do not read out */ + page = grab_cache_page(inode->i_mapping, bidx); + if (!page) + return; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); + if (err) + goto out; + + if (unlikely(dn.data_blkaddr == NULL_ADDR)) + goto put_out; + + get_node_info(fio.sbi, dn.nid, &ni); + set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); + + /* read page */ + fio.page = page; + fio.blk_addr = dn.data_blkaddr; + + fio.encrypted_page = grab_cache_page(META_MAPPING(fio.sbi), fio.blk_addr); + if (!fio.encrypted_page) + goto put_out; + + f2fs_submit_page_bio(&fio); + + /* allocate block address */ + f2fs_wait_on_page_writeback(dn.node_page, NODE); + + allocate_data_block(fio.sbi, NULL, fio.blk_addr, + &fio.blk_addr, &sum, CURSEG_COLD_DATA); + dn.data_blkaddr = fio.blk_addr; + + /* write page */ + lock_page(fio.encrypted_page); + set_page_writeback(fio.encrypted_page); + fio.rw = WRITE_SYNC; + f2fs_submit_page_mbio(&fio); + + set_data_blkaddr(&dn); + f2fs_update_extent_cache(&dn); + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + + f2fs_put_page(fio.encrypted_page, 1); +put_out: + f2fs_put_dnode(&dn); +out: + f2fs_put_page(page, 1); +} + static void move_data_page(struct inode *inode, block_t bidx, int gc_type) { struct page *page; @@ -537,6 +603,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) .type = DATA, .rw = WRITE_SYNC, .page = page, + .encrypted_page = NULL, }; f2fs_wait_on_page_writeback(page, DATA); @@ -606,6 +673,13 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (IS_ERR(inode) || is_bad_inode(inode)) continue; + /* if encrypted inode, let's go phase 3 */ + if (f2fs_encrypted_inode(inode) && + S_ISREG(inode->i_mode)) { + add_gc_inode(gc_list, inode); + continue; + } + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); data_page = get_read_data_page(inode, start_bidx + ofs_in_node, READA); @@ -624,7 +698,10 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (inode) { start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)) + ofs_in_node; - move_data_page(inode, start_bidx, gc_type); + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + move_encrypted_block(inode, start_bidx); + else + move_data_page(inode, start_bidx, gc_type); stat_inc_data_blk_count(sbi, 1, gc_type); } } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 11896429..a908535a 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -113,6 +113,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, .page = page, + .encrypted_page = NULL, }; int dirty, err; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bb74f58e..8abeb450 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1003,6 +1003,7 @@ static int read_node_page(struct page *page, int rw) .type = NODE, .rw = rw, .page = page, + .encrypted_page = NULL, }; get_node_info(sbi, page->index, &ni); @@ -1301,6 +1302,7 @@ static int f2fs_write_node_page(struct page *page, .type = NODE, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, .page = page, + .encrypted_page = NULL, }; trace_f2fs_writepage(page, NODE); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fa850154..08e91d8d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -278,6 +278,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) .sbi = sbi, .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, + .encrypted_page = NULL, }; /* @@ -1290,6 +1291,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) .rw = WRITE_SYNC | REQ_META | REQ_PRIO, .blk_addr = page->index, .page = page, + .encrypted_page = NULL, }; set_page_writeback(page); @@ -1389,20 +1391,34 @@ static inline bool is_merged_page(struct f2fs_sb_info *sbi, enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = &sbi->write_io[btype]; struct bio_vec *bvec; + struct page *target; int i; down_read(&io->io_rwsem); - if (!io->bio) - goto out; + if (!io->bio) { + up_read(&io->io_rwsem); + return false; + } bio_for_each_segment_all(bvec, io->bio, i) { - if (page == bvec->bv_page) { + + if (bvec->bv_page->mapping) { + target = bvec->bv_page; + } else { + struct f2fs_crypto_ctx *ctx; + + /* encrypted page */ + ctx = (struct f2fs_crypto_ctx *)page_private( + bvec->bv_page); + target = ctx->control_page; + } + + if (page == target) { up_read(&io->io_rwsem); return true; } } -out: up_read(&io->io_rwsem); return false; } From 2fc42c73be1eadc6938400f1027b543712736283 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Apr 2015 14:51:02 -0700 Subject: [PATCH 272/321] f2fs crypto: add filename encryption for f2fs_add_link This patch adds filename encryption support for f2fs_add_link. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 582a02fd..47affe6a 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -522,24 +522,33 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, unsigned long bidx, block; f2fs_hash_t dentry_hash; unsigned int nbucket, nblock; - size_t namelen = name->len; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dentry_ptr d; - int slots = GET_DENTRY_SLOTS(namelen); struct page *page = NULL; - int err = 0; + struct f2fs_filename fname; + struct qstr new_name; + int slots, err; + + err = f2fs_fname_setup_filename(dir, name, 0, &fname); + if (err) + return err; + + new_name.name = fname_name(&fname); + new_name.len = fname_len(&fname); if (f2fs_has_inline_dentry(dir)) { - err = f2fs_add_inline_entry(dir, name, inode, ino, mode); + err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); if (!err || err != -EAGAIN) - return err; + goto out; else err = 0; } - dentry_hash = f2fs_dentry_hash(name); level = 0; + slots = GET_DENTRY_SLOTS(new_name.len); + dentry_hash = f2fs_dentry_hash(&new_name); + current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { level = F2FS_I(dir)->clevel; @@ -547,8 +556,10 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } start: - if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) - return -ENOSPC; + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) { + err = -ENOSPC; + goto out; + } /* Increase the depth, if required */ if (level == current_depth) @@ -562,8 +573,10 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_page = get_new_data_page(dir, NULL, block, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); + if (IS_ERR(dentry_page)) { + err = PTR_ERR(dentry_page); + goto out; + } dentry_blk = kmap(dentry_page); bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, @@ -583,7 +596,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, NULL); + page = init_inode_metadata(inode, dir, &new_name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -591,7 +604,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } make_dentry_ptr(&d, (void *)dentry_blk, 1); - f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); @@ -613,6 +626,8 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } kunmap(dentry_page); f2fs_put_page(dentry_page, 1); +out: + f2fs_fname_free_filename(&fname); return err; } From 38e62e3c745773a122cbd1ce676251ccb6ecb6e1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Apr 2015 16:26:24 -0700 Subject: [PATCH 273/321] f2fs crypto: add filename encryption for f2fs_readdir This patch implements filename encryption support for f2fs_readdir. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/dir.c fs/f2fs/f2fs.h fs/f2fs/inline.c Change-Id: I251e7b074b8d62ab2d2bd8de9e8351be032343a5 --- fs/f2fs/dir.c | 58 ++++++++++++++++++++++++++++++++++++++---------- fs/f2fs/f2fs.h | 11 +++++---- fs/f2fs/inline.c | 13 ++++++----- 3 files changed, 60 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 47affe6a..61a28c24 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -104,7 +104,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); - make_dentry_ptr(&d, (void *)dentry_blk, 1); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); de = find_target_dentry(name, max_slots, &d, flags); if (de) @@ -371,7 +371,7 @@ static int make_empty_dir(struct inode *inode, dentry_blk = kmap_atomic(dentry_page); - make_dentry_ptr(&d, (void *)dentry_blk, 1); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); do_make_empty_dir(inode, parent, &d); kunmap_atomic(dentry_blk); @@ -603,7 +603,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } } - make_dentry_ptr(&d, (void *)dentry_blk, 1); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); @@ -765,11 +765,13 @@ bool f2fs_empty_dir(struct inode *dir) } bool f2fs_fill_dentries(struct file *file, void *dirent, filldir_t filldir, - struct f2fs_dentry_ptr *d, unsigned int n, unsigned int bit_pos) + struct f2fs_dentry_ptr *d, unsigned int n, unsigned int bit_pos, + struct f2fs_str *fstr) { unsigned int start_bit_pos = bit_pos; unsigned char d_type; struct f2fs_dir_entry *de = NULL; + struct f2fs_str de_name = FSTR_INIT(NULL, 0); unsigned char *types = f2fs_filetype_table; int over; @@ -780,11 +782,27 @@ bool f2fs_fill_dentries(struct file *file, void *dirent, filldir_t filldir, break; de = &d->dentry[bit_pos]; + if (types && de->file_type < F2FS_FT_MAX) d_type = types[de->file_type]; - over = filldir(dirent, d->filename[bit_pos], - le16_to_cpu(de->name_len), + /* encrypted case */ + de_name.name = d->filename[bit_pos]; + de_name.len = le16_to_cpu(de->name_len); + + if (f2fs_encrypted_inode(d->inode)) { + int save_len = fstr->len; + int ret; + + ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code, + &de_name, fstr); + de_name = *fstr; + fstr->len = save_len; + if (ret < 0) + return true; + } + + over = filldir(dirent, de_name.name, de_name.len, (n * d->max) + bit_pos, le32_to_cpu(de->ino), d_type); if (over) { @@ -807,10 +825,25 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; struct f2fs_dentry_ptr d; + struct f2fs_str fstr = FSTR_INIT(NULL, 0); unsigned int n = 0; + int err = 0; - if (f2fs_has_inline_dentry(inode)) - return f2fs_read_inline_dir(file, dirent, filldir); + err = f2fs_setup_fname_crypto(inode); + if (err) + return err; + + if (f2fs_encrypted_inode(inode)) { + err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN, + &fstr); + if (err < 0) + return err; + } + + if (f2fs_has_inline_dentry(inode)) { + err = f2fs_read_inline_dir(file, dirent, filldir, &fstr); + goto out; + } bit_pos = (pos % NR_DENTRY_IN_BLOCK); n = (pos / NR_DENTRY_IN_BLOCK); @@ -827,9 +860,9 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) dentry_blk = kmap(dentry_page); - make_dentry_ptr(&d, (void *)dentry_blk, 1); + make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); - if (f2fs_fill_dentries(file, dirent, filldir, &d, n, bit_pos)) + if (f2fs_fill_dentries(file, dirent, filldir, &d, n, bit_pos, &fstr)) goto stop; bit_pos = 0; @@ -843,8 +876,9 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } - - return 0; +out: + f2fs_fname_crypto_free_buffer(&fstr); + return err; } const struct file_operations f2fs_dir_operations = { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fae7247a..37927a6f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -276,15 +276,18 @@ struct f2fs_filename { #define fname_len(p) ((p)->disk_name.len) struct f2fs_dentry_ptr { + struct inode *inode; const void *bitmap; struct f2fs_dir_entry *dentry; __u8 (*filename)[F2FS_SLOT_LEN]; int max; }; -static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d, - void *src, int type) +static inline void make_dentry_ptr(struct inode *inode, + struct f2fs_dentry_ptr *d, void *src, int type) { + d->inode = inode; + if (type == 1) { struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; d->max = NR_DENTRY_IN_BLOCK; @@ -1583,7 +1586,7 @@ void set_de_type(struct f2fs_dir_entry *, umode_t); struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, struct f2fs_dentry_ptr *, unsigned int); bool f2fs_fill_dentries(struct file *, void *, filldir_t, - struct f2fs_dentry_ptr *, unsigned int, unsigned int); + struct f2fs_dentry_ptr *, unsigned int, unsigned int, struct f2fs_str *); void do_make_empty_dir(struct inode *, struct inode *, struct f2fs_dentry_ptr *); struct page *init_inode_metadata(struct inode *, struct inode *, @@ -1936,7 +1939,7 @@ int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); -int f2fs_read_inline_dir(struct file *, void *, filldir_t); +int f2fs_read_inline_dir(struct file *, void *, filldir_t, struct f2fs_str *); /* * crypto support diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a908535a..ddee1629 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -299,7 +299,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(ipage); - make_dentry_ptr(&d, (void *)inline_dentry, 2); + make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); de = find_target_dentry(name, NULL, &d, flags); unlock_page(ipage); @@ -343,7 +343,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, dentry_blk = inline_data_addr(ipage); - make_dentry_ptr(&d, (void *)dentry_blk, 2); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); @@ -447,7 +447,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, f2fs_wait_on_page_writeback(ipage, NODE); name_hash = f2fs_dentry_hash(name); - make_dentry_ptr(&d, (void *)dentry_blk, 2); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); set_page_dirty(ipage); @@ -524,7 +524,8 @@ bool f2fs_empty_inline_dir(struct inode *dir) return true; } -int f2fs_read_inline_dir(struct file *file, void *dirent, filldir_t filldir) +int f2fs_read_inline_dir(struct file *file, void *dirent, filldir_t filldir, + struct f2fs_str *fstr) { unsigned long pos = file->f_pos; unsigned int bit_pos = 0; @@ -544,9 +545,9 @@ int f2fs_read_inline_dir(struct file *file, void *dirent, filldir_t filldir) inline_dentry = inline_data_addr(ipage); - make_dentry_ptr(&d, (void *)inline_dentry, 2); + make_dentry_ptr(inode, &d, (void *)inline_dentry, 2); - if (!f2fs_fill_dentries(file, dirent, filldir, &d, 0, bit_pos)) + if (!f2fs_fill_dentries(file, dirent, filldir, &d, 0, bit_pos, fstr)) file->f_pos = NR_INLINE_DENTRY; f2fs_put_page(ipage, 1); From c6895da805afe208f2066a48f6eff4a0b17a0dd2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Apr 2015 17:12:39 -0700 Subject: [PATCH 274/321] f2fs crypto: add filename encryption for f2fs_lookup This patch implements filename encryption support for f2fs_lookup. Note that, f2fs_find_entry should be outside of f2fs_(un)lock_op(). Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/dir.c fs/f2fs/f2fs.h fs/f2fs/inline.c Change-Id: I7574f0c4b50fbd849d564fa644f8774f6563f2a9 --- fs/f2fs/dir.c | 87 ++++++++++++++++++++++++++---------------------- fs/f2fs/f2fs.h | 9 ++--- fs/f2fs/inline.c | 11 +++--- 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 61a28c24..1081247f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -81,20 +81,10 @@ static unsigned long dir_block_index(unsigned int level, return bidx; } -static bool early_match_name(size_t namelen, f2fs_hash_t namehash, - struct f2fs_dir_entry *de) -{ - if (le16_to_cpu(de->name_len) != namelen) - return false; - - if (de->hash_code != namehash) - return false; - - return true; -} - static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - struct qstr *name, int *max_slots, + struct f2fs_filename *fname, + f2fs_hash_t namehash, + int *max_slots, struct page **res_page, unsigned int flags) { @@ -105,8 +95,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); - de = find_target_dentry(name, max_slots, &d, flags); - + de = find_target_dentry(fname, namehash, max_slots, &d, flags); if (de) *res_page = dentry_page; else @@ -120,13 +109,15 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, return de; } -struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, +struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname, + f2fs_hash_t namehash, int *max_slots, struct f2fs_dentry_ptr *d, unsigned int flags) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; - f2fs_hash_t namehash = f2fs_dentry_hash(name); int max_len = 0; + struct f2fs_str de_name = FSTR_INIT(NULL, 0); + struct f2fs_str *name = &fname->disk_name; if (max_slots) *max_slots = 0; @@ -138,13 +129,21 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, } de = &d->dentry[bit_pos]; - if (flags & LOOKUP_NOCASE) { - if ((le16_to_cpu(de->name_len) == name->len) && - !strncasecmp(d->filename[bit_pos], - name->name, name->len)) + + /* encrypted case */ + de_name.name = d->filename[bit_pos]; + de_name.len = le16_to_cpu(de->name_len); + + /* show encrypted name */ + if (fname->hash) { + if (de->hash_code == fname->hash) goto found; - } else if (early_match_name(name->len, namehash, de) && - !memcmp(d->filename[bit_pos], name->name, name->len)) { + } else if (de_name.len == name->len && + de->hash_code == namehash && + !memcmp(de_name.name, name->name, name->len)) { + goto found; + } else if (flags & LOOKUP_NOCASE && de_name.len == name->len && + !strncasecmp(de_name.name, name->name, name->len)) { goto found; } @@ -167,17 +166,22 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, } static struct f2fs_dir_entry *find_in_level(struct inode *dir, - unsigned int level, struct qstr *name, - f2fs_hash_t namehash, struct page **res_page, - unsigned int flags) + unsigned int level, + struct f2fs_filename *fname, + struct page **res_page, + unsigned int flags) { - int s = GET_DENTRY_SLOTS(name->len); + struct qstr name = FSTR_TO_QSTR(&fname->disk_name); + int s = GET_DENTRY_SLOTS(name.len); unsigned int nbucket, nblock; unsigned int bidx, end_block; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; int max_slots; + f2fs_hash_t namehash; + + namehash = f2fs_dentry_hash(&name); f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); @@ -196,8 +200,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, continue; } - de = find_in_block(dentry_page, name, &max_slots, - res_page, flags); + de = find_in_block(dentry_page, fname, namehash, &max_slots, + res_page, flags); if (de) break; @@ -225,31 +229,34 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct qstr *child, { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; - f2fs_hash_t name_hash; unsigned int max_depth; unsigned int level; + struct f2fs_filename fname; + int err; *res_page = NULL; - if (f2fs_has_inline_dentry(dir)) - return find_in_inline_dir(dir, child, res_page, flags); + err = f2fs_fname_setup_filename(dir, child, 1, &fname); + if (err) + return NULL; + + if (f2fs_has_inline_dentry(dir)) { + de = find_in_inline_dir(dir, &fname, res_page, flags); + goto out; + } if (npages == 0) - return NULL; + goto out; - name_hash = f2fs_dentry_hash(child); max_depth = F2FS_I(dir)->i_current_depth; for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, child, name_hash, - res_page, flags); + de = find_in_level(dir, level, &fname, res_page, flags); if (de) break; } - if (!de && F2FS_I(dir)->chash != name_hash) { - F2FS_I(dir)->chash = name_hash; - F2FS_I(dir)->clevel = level - 1; - } +out: + f2fs_fname_free_filename(&fname); return de; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 37927a6f..e861bf6a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1583,8 +1583,9 @@ struct dentry *f2fs_get_parent(struct dentry *child); */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; void set_de_type(struct f2fs_dir_entry *, umode_t); -struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, - struct f2fs_dentry_ptr *, unsigned int); +struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *, + f2fs_hash_t, int *, struct f2fs_dentry_ptr *, + unsigned int); bool f2fs_fill_dentries(struct file *, void *, filldir_t, struct f2fs_dentry_ptr *, unsigned int, unsigned int, struct f2fs_str *); void do_make_empty_dir(struct inode *, struct inode *, @@ -1930,8 +1931,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); int f2fs_convert_inline_inode(struct inode *); int f2fs_write_inline_data(struct inode *, struct page *); bool recover_inline_data(struct inode *, struct page *); -struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, - struct page **, unsigned int); +struct f2fs_dir_entry *find_in_inline_dir(struct inode *, + struct f2fs_filename *, struct page **, unsigned int); struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index ddee1629..1661c9c2 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -284,24 +284,27 @@ bool recover_inline_data(struct inode *inode, struct page *npage) } struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, - struct qstr *name, struct page **res_page, - unsigned int flags) + struct f2fs_filename *fname, struct page **res_page, + unsigned int flags) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct f2fs_inline_dentry *inline_dentry; + struct qstr name = FSTR_TO_QSTR(&fname->disk_name); struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; struct page *ipage; + f2fs_hash_t namehash; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return NULL; + namehash = f2fs_dentry_hash(&name); + inline_dentry = inline_data_addr(ipage); make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); - de = find_target_dentry(name, NULL, &d, flags); - + de = find_target_dentry(fname, namehash, NULL, &d, flags); unlock_page(ipage); if (de) *res_page = ipage; From 3ae44c9da4486f17ab3e1b1b8ca6d975cc485e38 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Apr 2015 17:02:18 -0700 Subject: [PATCH 275/321] f2fs crypto: add filename encryption for roll-forward recovery This patch adds a bit flag to indicate whether or not i_name in the inode is encrypted. If this name is encrypted, we can't do recover_dentry during roll-forward. So, f2fs_sync_file() needs to do checkpoint, if this will be needed in future. Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/namei.c Change-Id: I053c1756d2562f88b8dbc28cd710f6539d9ac4b2 --- fs/f2fs/dir.c | 8 +++++++- fs/f2fs/f2fs.h | 5 ++++- fs/f2fs/file.c | 4 +++- fs/f2fs/namei.c | 5 ++++- fs/f2fs/recovery.c | 13 ++++++++++++- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1081247f..5a1f1f8b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -325,10 +325,14 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -int update_dent_inode(struct inode *inode, const struct qstr *name) +int update_dent_inode(struct inode *inode, struct inode *to, + const struct qstr *name) { struct page *page; + if (file_enc_name(to)) + return 0; + page = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(page)) return PTR_ERR(page); @@ -608,6 +612,8 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, err = PTR_ERR(page); goto fail; } + if (f2fs_encrypted_inode(dir)) + file_set_enc_name(inode); } make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e861bf6a..abc1334c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -377,6 +377,7 @@ struct f2fs_map_blocks { #define FADVISE_COLD_BIT 0x01 #define FADVISE_LOST_PINO_BIT 0x02 #define FADVISE_ENCRYPT_BIT 0x04 +#define FADVISE_ENC_NAME_BIT 0x08 #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -387,6 +388,8 @@ struct f2fs_map_blocks { #define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT) #define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT) #define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) +#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT) +#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) /* Encryption algorithms */ #define F2FS_ENCRYPTION_MODE_INVALID 0 @@ -1601,7 +1604,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); -int update_dent_inode(struct inode *, const struct qstr *); +int update_dent_inode(struct inode *, struct inode *, const struct qstr *); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, const struct qstr *, f2fs_hash_t , unsigned int); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f35be662..d5646a77 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -105,7 +105,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - if (update_dent_inode(inode, &dentry->d_name)) { + if (update_dent_inode(inode, inode, &dentry->d_name)) { dput(dentry); return 0; } @@ -122,6 +122,8 @@ static inline bool need_do_checkpoint(struct inode *inode) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; + else if (file_enc_name(inode) && need_dentry_mark(sbi, inode->i_ino)) + need_cp = true; else if (file_wrong_pino(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 9f97176a..7c8ed1f5 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -520,7 +520,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto put_out_dir; - if (update_dent_inode(old_inode, &new_dentry->d_name)) { + if (update_dent_inode(old_inode, new_inode, + &new_dentry->d_name)) { release_orphan_inode(sbi); goto put_out_dir; } @@ -560,6 +561,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, down_write(&F2FS_I(old_inode)->i_sem); file_lost_pino(old_inode); + if (new_inode && file_enc_name(new_inode)) + file_set_enc_name(old_inode); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = CURRENT_TIME; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 026ba6d3..cb400eef 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -83,6 +83,11 @@ static int recover_dentry(struct inode *inode, struct page *ipage) goto out; } + if (file_enc_name(inode)) { + iput(dir); + return 0; + } + name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; @@ -143,6 +148,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) static void recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); + char *name; inode->i_mode = le16_to_cpu(raw->i_mode); i_size_write(inode, le64_to_cpu(raw->i_size)); @@ -153,8 +159,13 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); + if (file_enc_name(inode)) + name = ""; + else + name = F2FS_INODE(page)->i_name; + f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", - ino_of_node(page), F2FS_INODE(page)->i_name); + ino_of_node(page), name); } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) From f705b2c933a2711bb0a0e4340c68b9ecae00ac31 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Apr 2015 15:10:53 -0700 Subject: [PATCH 276/321] f2fs crypto: add symlink encryption This patch implements encryption support for symlink. Signed-off-by: Uday Savagaonkar Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim Conflicts: fs/f2fs/namei.c --- fs/f2fs/f2fs.h | 1 + fs/f2fs/inode.c | 5 +- fs/f2fs/namei.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 150 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index abc1334c..6b601f6d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1919,6 +1919,7 @@ extern const struct address_space_operations f2fs_node_aops; extern const struct address_space_operations f2fs_meta_aops; extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; +extern const struct inode_operations f2fs_encrypted_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; extern struct kmem_cache *inode_entry_slab; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 107bd6a7..30d2db9f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -197,7 +197,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &f2fs_symlink_inode_operations; + if (f2fs_encrypted_inode(inode)) + inode->i_op = &f2fs_encrypted_symlink_inode_operations; + else + inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7c8ed1f5..edda7ae4 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -341,16 +341,26 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; - size_t symlen = strlen(symname) + 1; + size_t len = strlen(symname); + size_t p_len; + char *p_str; + struct f2fs_str disk_link = FSTR_INIT(NULL, 0); + struct f2fs_encrypted_symlink_data *sd = NULL; int err; + if (len > dir->i_sb->s_blocksize) + return -ENAMETOOLONG; + f2fs_balance_fs(sbi); inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); - inode->i_op = &f2fs_symlink_inode_operations; + if (f2fs_encrypted_inode(inode)) + inode->i_op = &f2fs_encrypted_symlink_inode_operations; + else + inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; f2fs_lock_op(sbi); @@ -358,10 +368,50 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (err) goto out; f2fs_unlock_op(sbi); - - err = page_symlink(inode, symname, symlen); alloc_nid_done(sbi, inode->i_ino); + if (f2fs_encrypted_inode(dir)) { + struct qstr istr = QSTR_INIT(symname, len); + + err = f2fs_inherit_context(dir, inode, NULL); + if (err) + goto err_out; + + err = f2fs_setup_fname_crypto(inode); + if (err) + goto err_out; + + err = f2fs_fname_crypto_alloc_buffer(inode, len, &disk_link); + if (err) + goto err_out; + + err = f2fs_fname_usr_to_disk(inode, &istr, &disk_link); + if (err < 0) + goto err_out; + + p_len = encrypted_symlink_data_len(disk_link.len) + 1; + + if (p_len > dir->i_sb->s_blocksize) { + err = -ENAMETOOLONG; + goto err_out; + } + + sd = kzalloc(p_len, GFP_NOFS); + if (!sd) { + err = -ENOMEM; + goto err_out; + } + memcpy(sd->encrypted_path, disk_link.name, disk_link.len); + sd->len = cpu_to_le16(disk_link.len); + p_str = (char *)sd; + } else { + p_len = len + 1; + p_str = (char *)symname; + } + + err = page_symlink(inode, p_str, p_len); + +err_out: d_instantiate(dentry, inode); unlock_new_inode(inode); @@ -374,10 +424,14 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, * If the symlink path is stored into inline_data, there is no * performance regression. */ - filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1); + if (!err) + filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + kfree(sd); + f2fs_fname_crypto_free_buffer(&disk_link); return err; out: handle_failed_inode(inode); @@ -606,6 +660,92 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, return err; } +#ifdef CONFIG_F2FS_FS_ENCRYPTION +static void *f2fs_encrypted_follow_link(struct dentry *dentry, + struct nameidata *nd) +{ + struct page *cpage = NULL; + char *caddr, *paddr = NULL; + struct f2fs_str cstr; + struct f2fs_str pstr = FSTR_INIT(NULL, 0); + struct inode *inode = dentry->d_inode; + struct f2fs_encrypted_symlink_data *sd; + loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1); + u32 max_size = inode->i_sb->s_blocksize; + int res; + + res = f2fs_setup_fname_crypto(inode); + if (res) + return ERR_PTR(res); + + cpage = read_mapping_page(inode->i_mapping, 0, NULL); + if (IS_ERR(cpage)) + return cpage; + caddr = kmap(cpage); + caddr[size] = 0; + + /* Symlink is encrypted */ + sd = (struct f2fs_encrypted_symlink_data *)caddr; + cstr.name = sd->encrypted_path; + cstr.len = le16_to_cpu(sd->len); + + /* this is broken symlink case */ + if (cstr.name[0] == 0 && cstr.len == 0) { + res = -ENOENT; + goto errout; + } + + if ((cstr.len + sizeof(struct f2fs_encrypted_symlink_data) - 1) > + max_size) { + /* Symlink data on the disk is corrupted */ + res = -EIO; + goto errout; + } + res = f2fs_fname_crypto_alloc_buffer(inode, cstr.len, &pstr); + if (res) + goto errout; + + res = f2fs_fname_disk_to_usr(inode, NULL, &cstr, &pstr); + if (res < 0) + goto errout; + + paddr = pstr.name; + + /* Null-terminate the name */ + paddr[res] = '\0'; + nd_set_link(nd, paddr); + + kunmap(cpage); + page_cache_release(cpage); + return NULL; +errout: + f2fs_fname_crypto_free_buffer(&pstr); + kunmap(cpage); + page_cache_release(cpage); + return ERR_PTR(res); +} + +void kfree_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); +} + +const struct inode_operations f2fs_encrypted_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = f2fs_encrypted_follow_link, + .put_link = kfree_put_link, + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +}; +#endif + const struct inode_operations f2fs_dir_inode_operations = { .create = f2fs_create, .lookup = f2fs_lookup, From 8ec728759b84acc92e4f4a96158f849055f4dd44 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 5 May 2015 20:20:29 -0700 Subject: [PATCH 277/321] f2fs crypto: fix missing key when reading a page 1. mount $mnt 2. cp data $mnt/ 3. umount $mnt 4. log out 5. log in 6. cat $mnt/data -> panic, due to no i_crypt_info. Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index c910fa72..c3f02b61 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -131,7 +131,9 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) unsigned long flags; struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; - BUG_ON(ci == NULL); + if (ci == NULL) + return ERR_PTR(-EACCES); + /* * We first try getting the ctx from a free list because in * the common case the ctx will have an allocated and From 130e1d04c780701413ce9f486008b03e2eaca0e2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 6 May 2015 18:23:21 -0700 Subject: [PATCH 278/321] f2fs crypto: remove checking key context during lookup No matter what the key is valid or not, readdir shows the dir entries correctly. So, lookup should not failed. But, we expect further accesses should be denied from open, rename, link, and so on. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index edda7ae4..2c805cd1 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -261,16 +261,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) return ERR_CAST(inode); - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode) && - !f2fs_is_child_context_consistent_with_parent(dir, inode)) { - iput(inode); - f2fs_msg(inode->i_sb, KERN_WARNING, - "Inconsistent encryption contexts: %lu/%lu\n", - (unsigned long)dir->i_ino, - (unsigned long)inode->i_ino); - return ERR_PTR(-EPERM); - } - if (f2fs_has_inline_dots(inode)) { err = __recover_dot_dentries(inode, dir->i_ino); if (err) From afa6718bcb61a85d6d7e855fc8c64da1961c9798 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 7 May 2015 18:11:37 +0800 Subject: [PATCH 279/321] f2fs: add default mount options to remount I use f2fs filesystem with /data partition on my Android phone by the default mount options. When I remount /data in order to adding discard option to run some benchmarks, I find the default options such as background_gc, user_xattr and acl turned off. So I introduce a function named default_options in super.c. It do some default setting, and both mount and remount operations will call this function to complete default setting. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d61f74ab..d5dfd143 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -661,6 +661,22 @@ static const struct file_operations f2fs_seq_segment_info_fops = { .release = single_release, }; +static void default_options(struct f2fs_sb_info *sbi) +{ + /* init some FS parameters */ + sbi->active_logs = NR_CURSEG_TYPE; + + set_opt(sbi, BG_GC); + set_opt(sbi, INLINE_DATA); + +#ifdef CONFIG_F2FS_FS_XATTR + set_opt(sbi, XATTR_USER); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + set_opt(sbi, POSIX_ACL); +#endif +} + static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -679,7 +695,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) active_logs = sbi->active_logs; sbi->mount_opt.opt = 0; - sbi->active_logs = NR_CURSEG_TYPE; + default_options(sbi); /* parse mount options */ err = parse_options(sb, data); @@ -1021,18 +1037,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; sb->s_fs_info = sbi; - /* init some FS parameters */ - sbi->active_logs = NR_CURSEG_TYPE; - - set_opt(sbi, BG_GC); - set_opt(sbi, INLINE_DATA); - -#ifdef CONFIG_F2FS_FS_XATTR - set_opt(sbi, XATTR_USER); -#endif -#ifdef CONFIG_F2FS_FS_POSIX_ACL - set_opt(sbi, POSIX_ACL); -#endif + default_options(sbi); /* parse mount options */ options = kstrdup((const char *)data, GFP_KERNEL); if (data && !options) { From 2b7a97cd3e2dbcead94bb46dd12abe60f54614fb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 11 May 2015 20:03:49 -0700 Subject: [PATCH 280/321] f2fs: do not issue next dnode discard redundantly We have a discard map, so that we can avoid redundant discard issues. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 08e91d8d..fb784d24 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -545,7 +545,20 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) { - if (f2fs_issue_discard(sbi, blkaddr, 1)) { + int err = -ENOTSUPP; + + if (test_opt(sbi, DISCARD)) { + struct seg_entry *se = get_seg_entry(sbi, + GET_SEGNO(sbi, blkaddr)); + unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + + if (f2fs_test_bit(offset, se->discard_map)) + return; + + err = f2fs_issue_discard(sbi, blkaddr, 1); + } + + if (err) { struct page *page = grab_meta_page(sbi, blkaddr); /* zero-filled page */ set_page_dirty(page); From 260b74b4efc7ce96fa2fa277d5f2f057c0ca291e Mon Sep 17 00:00:00 2001 From: hujianyang Date: Tue, 12 May 2015 16:05:57 +0800 Subject: [PATCH 281/321] f2fs: add compat_ioctl to provide backward compatability introduce compat_ioctl to regular files, but doesn't add this functionality to f2fs_dir_operations. While running a 32-bit busybox, I met an error like this: (A is a directory) chattr: reading flags on A: Inappropriate ioctl for device This patch copies compat_ioctl from f2fs_file_operations and fix this problem. Signed-off-by: hujianyang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 5a1f1f8b..354f4994 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -900,4 +900,7 @@ const struct file_operations f2fs_dir_operations = { .readdir = f2fs_readdir, .fsync = f2fs_sync_file, .unlocked_ioctl = f2fs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = f2fs_compat_ioctl, +#endif }; From cfbc7511830ad8cd7c7abb8cfd0eeb438b0dbfb9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 18 May 2015 11:45:15 -0700 Subject: [PATCH 282/321] f2fs: avoid buggy functions This patch avoids to use a buggy function for now. It needs to fix them later. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fb784d24..2c28fbc2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -134,6 +134,14 @@ static inline unsigned long __reverse_ffs(unsigned long word) static unsigned long __find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { + while (!f2fs_test_bit(offset, (unsigned char *)addr)) + offset++; + + if (offset > size) + offset = size; + + return offset; +#if 0 const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; @@ -180,11 +188,20 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, return result + size; /* Nope. */ found_middle: return result + __reverse_ffs(tmp); +#endif } static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { + while (f2fs_test_bit(offset, (unsigned char *)addr)) + offset++; + + if (offset > size) + offset = size; + + return offset; +#if 0 const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; @@ -232,6 +249,7 @@ static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, return result + size; /* Nope. */ found_middle: return result + __reverse_ffz(tmp); +#endif } void register_inmem_page(struct inode *inode, struct page *page) From 8131c8000e0aac284fa791efc0115ae0a3ed5d19 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2015 22:49:58 +0200 Subject: [PATCH 283/321] f2fs: fix building on 32-bit architectures A bug fix to the debug output extended the type of some local variables to 64-bit, which now causes the kernel to fail building because of missing 64-bit division functions: ERROR: "__aeabi_uldivmod" [fs/f2fs/f2fs.ko] undefined! In the kernel, we have to use div_u64 or do_div to do this, in order to annotate that this is an expensive operation. As the function is only called for debug out, we know this is not performance critical, so it is safe to use div_u64. Signed-off-by: Arnd Bergmann Fixes: d1f85bd38db19 ("f2fs: avoid value overflow in showing current status") Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index efbc83f0..75176e0d 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -113,10 +113,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi) ndirty++; } } - dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; - si->bimodal = bimodal / dist; + dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); + si->bimodal = div_u64(bimodal, dist); if (si->dirty_count) - si->avg_vblocks = total_vblocks / ndirty; + si->avg_vblocks = div_u64(total_vblocks, ndirty); else si->avg_vblocks = 0; } From b58f1059e75a6229a08a23ca9a161445c876c329 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 14 May 2015 11:52:28 +0300 Subject: [PATCH 284/321] f2fs: cleanup a confusing indent The return was not indented far enough so it looked like it was supposed to go with the other if statement. Signed-off-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2c28fbc2..4b63ea7e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -628,8 +628,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (!force) { if (!test_opt(sbi, DISCARD) || !se->valid_blocks || - SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) - return; + SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) + return; } /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ From 00a4eda8749306a5bce9d50797e708d09102268f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 May 2015 14:35:14 -0700 Subject: [PATCH 285/321] f2fs: truncate data blocks for orphan inode As Hu reported, F2FS has a space leak problem, when conducting: 1) format a 4GB f2fs partition 2) dd a 3G file, 3) unlink it. So, when doing f2fs_drop_inode(), we need to truncate data blocks before skipping it. We can also drop unused caches assigned to each inode. Reported-by: hujianyang Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d5dfd143..bfc900e8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -431,8 +431,30 @@ static int f2fs_drop_inode(struct inode *inode) * - f2fs_gc -> iput -> evict * - inode_wait_for_writeback(inode) */ - if (!inode_unhashed(inode) && inode->i_state & I_SYNC) + if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { + if (!inode->i_nlink && !is_bad_inode(inode)) { + spin_unlock(&inode->i_lock); + + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode)) + commit_inmem_pages(inode, true); + + sb_start_intwrite(inode->i_sb); + i_size_write(inode, 0); + + if (F2FS_HAS_BLOCKS(inode)) + f2fs_truncate(inode); + + sb_end_intwrite(inode->i_sb); + +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_I(inode)->i_crypt_info) + f2fs_free_encryption_info(inode); +#endif + spin_lock(&inode->i_lock); + } return 0; + } return generic_drop_inode(inode); } From ba79a062e21669e9b7fcdd645174b57e564d2cd2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 May 2015 13:26:54 -0700 Subject: [PATCH 286/321] f2fs crypto: use slab caches This patch integrates the below patch into f2fs. "ext4 crypto: use slab caches Use slab caches the ext4_crypto_ctx and ext4_crypt_info structures for slighly better memory efficiency and debuggability." Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 58 ++++++++++++++++++++++---------------------- fs/f2fs/crypto_key.c | 6 ++--- fs/f2fs/f2fs.h | 1 + 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index c3f02b61..4057c072 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -66,6 +66,9 @@ static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock); struct workqueue_struct *f2fs_read_workqueue; static DEFINE_MUTEX(crypto_init); +static struct kmem_cache *f2fs_crypto_ctx_cachep; +struct kmem_cache *f2fs_crypt_info_cachep; + /** * f2fs_release_crypto_ctx() - Releases an encryption context * @ctx: The encryption context to release. @@ -90,7 +93,7 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { if (ctx->tfm) crypto_free_tfm(ctx->tfm); - kfree(ctx); + kmem_cache_free(f2fs_crypto_ctx_cachep, ctx); } else { spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); @@ -98,23 +101,6 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) } } -/** - * f2fs_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context - * @mask: The allocation mask. - * - * Return: An allocated and initialized encryption context on success. An error - * value or NULL otherwise. - */ -static struct f2fs_crypto_ctx *f2fs_alloc_and_init_crypto_ctx(gfp_t mask) -{ - struct f2fs_crypto_ctx *ctx = kzalloc(sizeof(struct f2fs_crypto_ctx), - mask); - - if (!ctx) - return ERR_PTR(-ENOMEM); - return ctx; -} - /** * f2fs_get_crypto_ctx() - Gets an encryption context * @inode: The inode for which we are doing the crypto @@ -151,9 +137,9 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) list_del(&ctx->free_list); spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); if (!ctx) { - ctx = f2fs_alloc_and_init_crypto_ctx(GFP_NOFS); - if (IS_ERR(ctx)) { - res = PTR_ERR(ctx); + ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS); + if (!ctx) { + res = -ENOMEM; goto out; } ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; @@ -263,7 +249,7 @@ void f2fs_exit_crypto(void) } if (pos->tfm) crypto_free_tfm(pos->tfm); - kfree(pos); + kmem_cache_free(f2fs_crypto_ctx_cachep, pos); } INIT_LIST_HEAD(&f2fs_free_crypto_ctxs); if (f2fs_bounce_page_pool) @@ -272,6 +258,12 @@ void f2fs_exit_crypto(void) if (f2fs_read_workqueue) destroy_workqueue(f2fs_read_workqueue); f2fs_read_workqueue = NULL; + if (f2fs_crypto_ctx_cachep) + kmem_cache_destroy(f2fs_crypto_ctx_cachep); + f2fs_crypto_ctx_cachep = NULL; + if (f2fs_crypt_info_cachep) + kmem_cache_destroy(f2fs_crypt_info_cachep); + f2fs_crypt_info_cachep = NULL; } /** @@ -284,24 +276,32 @@ void f2fs_exit_crypto(void) */ int f2fs_init_crypto(void) { - int i, res; + int i, res = -ENOMEM; mutex_lock(&crypto_init); if (f2fs_read_workqueue) goto already_initialized; f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0); - if (!f2fs_read_workqueue) { - res = -ENOMEM; + if (!f2fs_read_workqueue) + goto fail; + + f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypto_ctx_cachep) + goto fail; + + f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypt_info_cachep) goto fail; - } for (i = 0; i < num_prealloc_crypto_ctxs; i++) { struct f2fs_crypto_ctx *ctx; - ctx = f2fs_alloc_and_init_crypto_ctx(GFP_KERNEL); - if (IS_ERR(ctx)) { - res = PTR_ERR(ctx); + ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_KERNEL); + if (!ctx) { + res = -ENOMEM; goto fail; } list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index c7d414dd..6b9312b2 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c @@ -99,7 +99,7 @@ void f2fs_free_encryption_info(struct inode *inode) key_put(ci->ci_keyring_key); crypto_free_ablkcipher(ci->ci_ctfm); memzero_explicit(&ci->ci_raw, sizeof(ci->ci_raw)); - kfree(ci); + kmem_cache_free(f2fs_crypt_info_cachep, ci); fi->i_crypt_info = NULL; } @@ -137,7 +137,7 @@ int _f2fs_get_encryption_info(struct inode *inode) return -EINVAL; res = 0; - crypt_info = kmalloc(sizeof(struct f2fs_crypt_info), GFP_NOFS); + crypt_info = kmem_cache_alloc(f2fs_crypt_info_cachep, GFP_NOFS); if (!crypt_info) return -ENOMEM; @@ -187,7 +187,7 @@ int _f2fs_get_encryption_info(struct inode *inode) if (res < 0) { if (res == -ENOKEY) res = 0; - kfree(crypt_info); + kmem_cache_free(f2fs_crypt_info_cachep, crypt_info); } else { fi->i_crypt_info = crypt_info; crypt_info->ci_keyring_key = keyring_key; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6b601f6d..377f84a5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2002,6 +2002,7 @@ int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *); int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *); /* crypt.c */ +extern struct kmem_cache *f2fs_crypt_info_cachep; extern struct workqueue_struct *f2fs_read_workqueue; bool f2fs_valid_contents_enc_mode(uint32_t); uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t); From 8c346e3f852203d0f1a6117484f4db4eda630bee Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 May 2015 13:33:00 -0700 Subject: [PATCH 287/321] f2fs crypto: get rid of ci_mode from struct f2fs_crypt_info This patch integrates the below patch into f2fs. "ext4 crypto: get rid of ci_mode from struct ext4_crypt_info The ci_mode field was superfluous, and getting rid of it gets rid of an unused hole in the structure." Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 11 +++++------ fs/f2fs/crypto_fname.c | 4 ++-- fs/f2fs/crypto_key.c | 12 ++++++------ fs/f2fs/f2fs_crypto.h | 1 - 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 4057c072..e4058b92 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -151,14 +151,13 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) * Allocate a new Crypto API context if we don't already have * one or if it isn't the right mode. */ - BUG_ON(ci->ci_mode == F2FS_ENCRYPTION_MODE_INVALID); - if (ctx->tfm && (ctx->mode != ci->ci_mode)) { + if (ctx->tfm && (ctx->mode != ci->ci_data_mode)) { crypto_free_tfm(ctx->tfm); ctx->tfm = NULL; ctx->mode = F2FS_ENCRYPTION_MODE_INVALID; } if (!ctx->tfm) { - switch (ci->ci_mode) { + switch (ci->ci_data_mode) { case F2FS_ENCRYPTION_MODE_AES_256_XTS: ctx->tfm = crypto_ablkcipher_tfm( crypto_alloc_ablkcipher("xts(aes)", 0, 0)); @@ -178,9 +177,9 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) ctx->tfm = NULL; goto out; } - ctx->mode = ci->ci_mode; + ctx->mode = ci->ci_data_mode; } - BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_mode)); + BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_data_mode)); /* * There shouldn't be a bounce page attached to the crypto @@ -388,7 +387,7 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx, int res = 0; BUG_ON(!ctx->tfm); - BUG_ON(ctx->mode != fi->i_crypt_info->ci_mode); + BUG_ON(ctx->mode != fi->i_crypt_info->ci_data_mode); if (ctx->mode != F2FS_ENCRYPTION_MODE_AES_256_XTS) { printk_ratelimited(KERN_ERR diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c index 8f3ff9b3..e3a1bdc7 100644 --- a/fs/f2fs/crypto_fname.c +++ b/fs/f2fs/crypto_fname.c @@ -269,9 +269,9 @@ int f2fs_setup_fname_crypto(struct inode *inode) if (!ci || ci->ci_ctfm) return 0; - if (ci->ci_mode != F2FS_ENCRYPTION_MODE_AES_256_CTS) { + if (ci->ci_filename_mode != F2FS_ENCRYPTION_MODE_AES_256_CTS) { printk_once(KERN_WARNING "f2fs: unsupported key mode %d\n", - ci->ci_mode); + ci->ci_filename_mode); return -ENOKEY; } diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index 6b9312b2..5ae9cc8f 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c @@ -148,14 +148,14 @@ int _f2fs_get_encryption_info(struct inode *inode) memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); if (S_ISREG(inode->i_mode)) - crypt_info->ci_mode = ctx.contents_encryption_mode; + crypt_info->ci_size = + f2fs_encryption_key_size(crypt_info->ci_data_mode); else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - crypt_info->ci_mode = ctx.filenames_encryption_mode; - else { - printk(KERN_ERR "f2fs crypto: Unsupported inode type.\n"); + crypt_info->ci_size = + f2fs_encryption_key_size(crypt_info->ci_filename_mode); + else BUG(); - } - crypt_info->ci_size = f2fs_encryption_key_size(crypt_info->ci_mode); + BUG_ON(!crypt_info->ci_size); memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX, diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index 6e413949..58682d28 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -75,7 +75,6 @@ struct f2fs_encryption_key { } __attribute__((__packed__)); struct f2fs_crypt_info { - unsigned char ci_mode; unsigned char ci_size; char ci_data_mode; char ci_filename_mode; From a1894a920c522520b22f7f27b20ffe9ee7a1bed0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 May 2015 13:40:20 -0700 Subject: [PATCH 288/321] f2fs crypto: shrink size of the f2fs_crypto_ctx structure This patch integrates the below patch into f2fs. "ext4 crypto: shrink size of the ext4_crypto_ctx structure Some fields are only used when the crypto_ctx is being used on the read path, some are only used on the write path, and some are only used when the structure is on free list. Optimize memory use by using a union." Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 40 ++++++++++++++++++---------------------- fs/f2fs/f2fs_crypto.h | 21 ++++++++++++++------- fs/f2fs/segment.c | 2 +- 3 files changed, 33 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index e4058b92..6d38da11 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -82,14 +82,14 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) { unsigned long flags; - if (ctx->bounce_page) { + if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) { if (ctx->flags & F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) - __free_page(ctx->bounce_page); + __free_page(ctx->w.bounce_page); else - mempool_free(ctx->bounce_page, f2fs_bounce_page_pool); - ctx->bounce_page = NULL; + mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool); + ctx->w.bounce_page = NULL; } - ctx->control_page = NULL; + ctx->w.control_page = NULL; if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { if (ctx->tfm) crypto_free_tfm(ctx->tfm); @@ -146,6 +146,7 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) } else { ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } + ctx->flags &= ~F2FS_WRITE_PATH_FL; /* * Allocate a new Crypto API context if we don't already have @@ -181,12 +182,6 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) } BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_data_mode)); - /* - * There shouldn't be a bounce page attached to the crypto - * context at this point. - */ - BUG_ON(ctx->bounce_page); - out: if (res) { if (!IS_ERR_OR_NULL(ctx)) @@ -203,8 +198,8 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) static void completion_pages(struct work_struct *work) { struct f2fs_crypto_ctx *ctx = - container_of(work, struct f2fs_crypto_ctx, work); - struct bio *bio = ctx->bio; + container_of(work, struct f2fs_crypto_ctx, r.work); + struct bio *bio = ctx->r.bio; struct bio_vec *bv; int i; @@ -225,9 +220,9 @@ static void completion_pages(struct work_struct *work) void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio) { - INIT_WORK(&ctx->work, completion_pages); - ctx->bio = bio; - queue_work(f2fs_read_workqueue, &ctx->work); + INIT_WORK(&ctx->r.work, completion_pages); + ctx->r.bio = bio; + queue_work(f2fs_read_workqueue, &ctx->r.work); } /** @@ -238,12 +233,12 @@ void f2fs_exit_crypto(void) struct f2fs_crypto_ctx *pos, *n; list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) { - if (pos->bounce_page) { + if (pos->w.bounce_page) { if (pos->flags & F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) - __free_page(pos->bounce_page); + __free_page(pos->w.bounce_page); else - mempool_free(pos->bounce_page, + mempool_free(pos->w.bounce_page, f2fs_bounce_page_pool); } if (pos->tfm) @@ -335,7 +330,7 @@ void f2fs_restore_and_release_control_page(struct page **page) ctx = (struct f2fs_crypto_ctx *)page_private(bounce_page); /* restore control page */ - *page = ctx->control_page; + *page = ctx->w.control_page; f2fs_restore_control_page(bounce_page); } @@ -492,8 +487,9 @@ struct page *f2fs_encrypt(struct inode *inode, } else { ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; } - ctx->bounce_page = ciphertext_page; - ctx->control_page = plaintext_page; + ctx->flags |= F2FS_WRITE_PATH_FL; + ctx->w.bounce_page = ciphertext_page; + ctx->w.control_page = plaintext_page; err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index, plaintext_page, ciphertext_page); if (err) { diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index 58682d28..e33cec9c 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -87,16 +87,23 @@ struct f2fs_crypt_info { #define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 #define F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002 +#define F2FS_WRITE_PATH_FL 0x00000004 struct f2fs_crypto_ctx { struct crypto_tfm *tfm; /* Crypto API context */ - struct page *bounce_page; /* Ciphertext page on write path */ - struct page *control_page; /* Original page on write path */ - struct bio *bio; /* The bio for this context */ - struct work_struct work; /* Work queue for read complete path */ - struct list_head free_list; /* Free list */ - int flags; /* Flags */ - int mode; /* Encryption mode for tfm */ + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + char flags; /* Flags */ + char mode; /* Encryption mode for tfm */ }; struct f2fs_completion_result { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4b63ea7e..4553d663 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1441,7 +1441,7 @@ static inline bool is_merged_page(struct f2fs_sb_info *sbi, /* encrypted page */ ctx = (struct f2fs_crypto_ctx *)page_private( bvec->bv_page); - target = ctx->control_page; + target = ctx->w.control_page; } if (page == target) { From deb64ae8a8b2441eb8faf8a0f6cba09ff6e0ed06 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 13 May 2015 18:20:54 +0800 Subject: [PATCH 289/321] f2fs crypto: fix to release buffer for fname crypto This patch fixes memory leak issue in error path of f2fs_fname_setup_filename(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto_fname.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c index e3a1bdc7..016c4b63 100644 --- a/fs/f2fs/crypto_fname.c +++ b/fs/f2fs/crypto_fname.c @@ -425,7 +425,7 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, if (!f2fs_encrypted_inode(dir) || is_dot_dotdot(iname)) { fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; - goto out; + return 0; } ret = f2fs_setup_fname_crypto(dir); if (ret) @@ -435,14 +435,13 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, ret = f2fs_fname_crypto_alloc_buffer(dir, iname->len, &fname->crypto_buf); if (ret < 0) - goto out; + return ret; ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf); if (ret < 0) goto out; fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; - ret = 0; - goto out; + return 0; } if (!lookup) { ret = -EACCES; @@ -476,8 +475,9 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; } - ret = 0; + return 0; out: + f2fs_fname_crypto_free_buffer(&fname->crypto_buf); return ret; } From 2c73184579c1c199ea2bd2184026fdd432b3c088 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 15 May 2015 11:14:34 +0800 Subject: [PATCH 290/321] f2fs crypto: fix incorrect release for crypto ctx When encryption feature is enable, if we rmmod f2fs module, we will encounter a stack backtrace reported in syslog: "BUG: Bad page state in process rmmod pfn:aaf8a page:f0f4f148 count:0 mapcount:129 mapping:ee2f4104 index:0x80 flags: 0xee2830a4(referenced|lru|slab|private_2|writeback|swapbacked|mlocked) page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set bad because of flags: flags: 0x2030a0(lru|slab|private_2|writeback|mlocked) Modules linked in: f2fs(O-) fuse bnep rfcomm bluetooth dm_crypt binfmt_misc snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_timer snd_seq_device joydev ppdev mac_hid lp hid_generic i2c_piix4 parport_pc psmouse snd serio_raw parport soundcore ext4 jbd2 mbcache usbhid hid e1000 [last unloaded: f2fs] CPU: 1 PID: 3049 Comm: rmmod Tainted: G B O 4.1.0-rc3+ #10 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 00000000 00000000 c0021eb4 c15b7518 f0f4f148 c0021ed8 c112e0b7 c1779174 c9b75674 000aaf8a 01b13ce1 c17791a4 f0f4f148 ee2830a4 c0021ef8 c112e3c3 00000000 f0f4f148 c0021f34 f0f4f148 ee2830a4 ef9f0000 c0021f20 c112fdf8 Call Trace: [] dump_stack+0x41/0x52 [] bad_page.part.72+0xa7/0x100 [] free_pages_prepare+0x213/0x220 [] free_hot_cold_page+0x28/0x120 [] ? try_to_wake_up+0x2b0/0x2b0 [] __free_pages+0x25/0x30 [] mempool_free_pages+0xd/0x10 [] mempool_free+0x31/0x90 [] f2fs_exit_crypto+0x6f/0xf0 [f2fs] [] exit_f2fs_fs+0x23/0x95f [f2fs] [] SyS_delete_module+0x130/0x180 [] ? vm_munmap+0x46/0x60 [] sysenter_do_call+0x12/0x12" The reason is that: since commit 0827e645fd35 ("f2fs crypto: shrink size of the f2fs_crypto_ctx structure") is merged, some fields in f2fs_crypto_ctx structure are merged into a union as they will never be used simultaneously in write path, read path or on free list. In f2fs_exit_crypto, we traverse each crypto ctx from free list, in this moment, our free_list field in union is valid, but still we will try to release memory space which is pointed by other invalid field in union structure for each ctx. Then the error occurs, let's fix it with this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 6d38da11..7f1ee9e8 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -233,14 +233,6 @@ void f2fs_exit_crypto(void) struct f2fs_crypto_ctx *pos, *n; list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) { - if (pos->w.bounce_page) { - if (pos->flags & - F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) - __free_page(pos->w.bounce_page); - else - mempool_free(pos->w.bounce_page, - f2fs_bounce_page_pool); - } if (pos->tfm) crypto_free_tfm(pos->tfm); kmem_cache_free(f2fs_crypto_ctx_cachep, pos); From b24fdfafde87a9416c3fc2b61491e6ac78051185 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 15 May 2015 15:37:24 -0700 Subject: [PATCH 291/321] f2fs crypto: split f2fs_crypto_init/exit with two parts This patch splits f2fs_crypto_init/exit with two parts: base initialization and memory allocation. Firstly, f2fs module declares the base encryption memory pointers. Then, allocating internal memories is done at the first encrypted inode access. Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 90 +++++++++++++++++++++++++------------------- fs/f2fs/crypto_key.c | 8 ++-- fs/f2fs/f2fs.h | 6 +-- fs/f2fs/super.c | 8 +++- 4 files changed, 65 insertions(+), 47 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 7f1ee9e8..c6d11229 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -63,7 +63,7 @@ static mempool_t *f2fs_bounce_page_pool; static LIST_HEAD(f2fs_free_crypto_ctxs); static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock); -struct workqueue_struct *f2fs_read_workqueue; +static struct workqueue_struct *f2fs_read_workqueue; static DEFINE_MUTEX(crypto_init); static struct kmem_cache *f2fs_crypto_ctx_cachep; @@ -225,10 +225,7 @@ void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio) queue_work(f2fs_read_workqueue, &ctx->r.work); } -/** - * f2fs_exit_crypto() - Shutdown the f2fs encryption system - */ -void f2fs_exit_crypto(void) +static void f2fs_crypto_destroy(void) { struct f2fs_crypto_ctx *pos, *n; @@ -241,73 +238,90 @@ void f2fs_exit_crypto(void) if (f2fs_bounce_page_pool) mempool_destroy(f2fs_bounce_page_pool); f2fs_bounce_page_pool = NULL; - if (f2fs_read_workqueue) - destroy_workqueue(f2fs_read_workqueue); - f2fs_read_workqueue = NULL; - if (f2fs_crypto_ctx_cachep) - kmem_cache_destroy(f2fs_crypto_ctx_cachep); - f2fs_crypto_ctx_cachep = NULL; - if (f2fs_crypt_info_cachep) - kmem_cache_destroy(f2fs_crypt_info_cachep); - f2fs_crypt_info_cachep = NULL; } /** - * f2fs_init_crypto() - Set up for f2fs encryption. + * f2fs_crypto_initialize() - Set up for f2fs encryption. * * We only call this when we start accessing encrypted files, since it * results in memory getting allocated that wouldn't otherwise be used. * * Return: Zero on success, non-zero otherwise. */ -int f2fs_init_crypto(void) +int f2fs_crypto_initialize(void) { int i, res = -ENOMEM; + if (f2fs_bounce_page_pool) + return 0; + mutex_lock(&crypto_init); - if (f2fs_read_workqueue) + if (f2fs_bounce_page_pool) goto already_initialized; - f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0); - if (!f2fs_read_workqueue) - goto fail; - - f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx, - SLAB_RECLAIM_ACCOUNT); - if (!f2fs_crypto_ctx_cachep) - goto fail; - - f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info, - SLAB_RECLAIM_ACCOUNT); - if (!f2fs_crypt_info_cachep) - goto fail; - for (i = 0; i < num_prealloc_crypto_ctxs; i++) { struct f2fs_crypto_ctx *ctx; ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_KERNEL); - if (!ctx) { - res = -ENOMEM; + if (!ctx) goto fail; - } list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); } + /* must be allocated at the last step to avoid race condition above */ f2fs_bounce_page_pool = mempool_create_page_pool(num_prealloc_crypto_pages, 0); - if (!f2fs_bounce_page_pool) { - res = -ENOMEM; + if (!f2fs_bounce_page_pool) goto fail; - } + already_initialized: mutex_unlock(&crypto_init); return 0; fail: - f2fs_exit_crypto(); + f2fs_crypto_destroy(); mutex_unlock(&crypto_init); return res; } +/** + * f2fs_exit_crypto() - Shutdown the f2fs encryption system + */ +void f2fs_exit_crypto(void) +{ + f2fs_crypto_destroy(); + + if (f2fs_read_workqueue) + destroy_workqueue(f2fs_read_workqueue); + if (f2fs_crypto_ctx_cachep) + kmem_cache_destroy(f2fs_crypto_ctx_cachep); + if (f2fs_crypt_info_cachep) + kmem_cache_destroy(f2fs_crypt_info_cachep); +} + +int __init f2fs_init_crypto(void) +{ + int res = -ENOMEM; + + f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0); + if (!f2fs_read_workqueue) + goto fail; + + f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypto_ctx_cachep) + goto fail; + + f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypt_info_cachep) + goto fail; + + return 0; +fail: + f2fs_exit_crypto(); + return res; +} + void f2fs_restore_and_release_control_page(struct page **page) { struct f2fs_crypto_ctx *ctx; diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index 5ae9cc8f..8a10569a 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c @@ -115,11 +115,9 @@ int _f2fs_get_encryption_info(struct inode *inode) struct user_key_payload *ukp; int res; - if (!f2fs_read_workqueue) { - res = f2fs_init_crypto(); - if (res) - return res; - } + res = f2fs_crypto_initialize(); + if (res) + return res; if (fi->i_crypt_info) { if (!fi->i_crypt_info->ci_keyring_key || diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 377f84a5..96ad9758 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2003,7 +2003,6 @@ int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *); /* crypt.c */ extern struct kmem_cache *f2fs_crypt_info_cachep; -extern struct workqueue_struct *f2fs_read_workqueue; bool f2fs_valid_contents_enc_mode(uint32_t); uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t); struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *); @@ -2030,7 +2029,8 @@ int f2fs_fname_usr_to_disk(struct inode *, const struct qstr *, void f2fs_restore_and_release_control_page(struct page **); void f2fs_restore_control_page(struct page *); -int f2fs_init_crypto(void); +int __init f2fs_init_crypto(void); +int f2fs_crypto_initialize(void); void f2fs_exit_crypto(void); int f2fs_has_encryption_key(struct inode *); @@ -2057,7 +2057,7 @@ void f2fs_fname_free_filename(struct f2fs_filename *); static inline void f2fs_restore_and_release_control_page(struct page **p) { } static inline void f2fs_restore_control_page(struct page *p) { } -static inline int f2fs_init_crypto(void) { return 0; } +static inline int __init f2fs_init_crypto(void) { return 0; } static inline void f2fs_exit_crypto(void) { } static inline int f2fs_has_encryption_key(struct inode *i) { return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bfc900e8..4f74fee2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1359,13 +1359,18 @@ static int __init init_f2fs_fs(void) err = -ENOMEM; goto free_extent_cache; } - err = register_filesystem(&f2fs_fs_type); + err = f2fs_init_crypto(); if (err) goto free_kset; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_crypto; f2fs_create_root_stats(); f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); return 0; +free_crypto: + f2fs_exit_crypto(); free_kset: kset_unregister(f2fs_kset); free_extent_cache: @@ -1387,6 +1392,7 @@ static void __exit exit_f2fs_fs(void) remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); + f2fs_exit_crypto(); destroy_extent_cache(); destroy_checkpoint_caches(); destroy_segment_manager_caches(); From 191808c58e7665a8b23409b7e3aa7b042c115636 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 18 May 2015 18:00:06 +0800 Subject: [PATCH 292/321] f2fs crypto: zero next free dnode block Now page cache of meta inode is used by garbage collection for encrypted page, it may contain random data, so we should zero it before issuing discard. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4553d663..052e592e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -578,7 +578,7 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) if (err) { struct page *page = grab_meta_page(sbi, blkaddr); - /* zero-filled page */ + memset(page_address(page), 0, F2FS_BLKSIZE); set_page_dirty(page); f2fs_put_page(page, 1); } From 258fb7b6207322baed814601790f7aac4a93a1bd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 19 May 2015 17:40:04 +0800 Subject: [PATCH 293/321] f2fs: introduce update_meta_page Add a help function update_meta_page() to update meta page with specified buffer. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 26 ++++++-------------------- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 25 +++++++++++++++---------- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a43c17f8..f529e896 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -889,10 +889,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; - struct page *cp_page; unsigned int data_sum_blocks, orphan_blocks; __u32 crc32 = 0; - void *kaddr; int i; int cp_payload_blks = __cp_payload(sbi); @@ -989,19 +987,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk = __start_cp_addr(sbi); /* write out checkpoint buffer at block 0 */ - cp_page = grab_meta_page(sbi, start_blk++); - kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, F2FS_BLKSIZE); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); - - for (i = 1; i < 1 + cp_payload_blks; i++) { - cp_page = grab_meta_page(sbi, start_blk++); - kaddr = page_address(cp_page); - memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); - } + update_meta_page(sbi, ckpt, start_blk++); + + for (i = 1; i < 1 + cp_payload_blks; i++) + update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, + start_blk++); if (orphan_num) { write_orphan_inodes(sbi, start_blk); @@ -1016,11 +1006,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* writeout checkpoint block */ - cp_page = grab_meta_page(sbi, start_blk); - kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, F2FS_BLKSIZE); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); + update_meta_page(sbi, ckpt, start_blk); /* wait for previous submitted node/meta pages writeback */ wait_on_all_pages_writeback(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 96ad9758..343a8f33 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1689,6 +1689,7 @@ int npages_for_summary_flush(struct f2fs_sb_info *, bool); void allocate_new_segments(struct f2fs_sb_info *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); +void update_meta_page(struct f2fs_sb_info *, void *, block_t); void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(unsigned int, struct f2fs_io_info *); void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 052e592e..bcaf658c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -576,12 +576,8 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) err = f2fs_issue_discard(sbi, blkaddr, 1); } - if (err) { - struct page *page = grab_meta_page(sbi, blkaddr); - memset(page_address(page), 0, F2FS_BLKSIZE); - set_page_dirty(page); - f2fs_put_page(page, 1); - } + if (err) + update_meta_page(sbi, NULL, blkaddr); } static void __add_discard_entry(struct f2fs_sb_info *sbi, @@ -860,16 +856,25 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); } -static void write_sum_page(struct f2fs_sb_info *sbi, - struct f2fs_summary_block *sum_blk, block_t blk_addr) +void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { struct page *page = grab_meta_page(sbi, blk_addr); - void *kaddr = page_address(page); - memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); + void *dst = page_address(page); + + if (src) + memcpy(dst, src, PAGE_CACHE_SIZE); + else + memset(dst, 0, PAGE_CACHE_SIZE); set_page_dirty(page); f2fs_put_page(page, 1); } +static void write_sum_page(struct f2fs_sb_info *sbi, + struct f2fs_summary_block *sum_blk, block_t blk_addr) +{ + update_meta_page(sbi, (void *)sum_blk, blk_addr); +} + static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); From 215a269d84891bfaa19be4c7443b67fbcac34782 Mon Sep 17 00:00:00 2001 From: hujianyang Date: Thu, 21 May 2015 14:42:53 +0800 Subject: [PATCH 294/321] f2fs: recovering broken superblock during mount This patch recovers a broken superblock with the other valid one. Signed-off-by: hujianyang [Jaegeuk Kim: reinitialize local variables in f2fs_fill_super for retrial] Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 65 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4f74fee2..d4cd04dc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -970,29 +970,36 @@ static void init_sb_info(struct f2fs_sb_info *sbi) */ static int read_raw_super_block(struct super_block *sb, struct f2fs_super_block **raw_super, - struct buffer_head **raw_super_buf) + struct buffer_head **raw_super_buf, + int *recovery) { int block = 0; + struct buffer_head *buffer; + struct f2fs_super_block *super; + int err = 0; retry: - *raw_super_buf = sb_bread(sb, block); - if (!*raw_super_buf) { + buffer = sb_bread(sb, block); + if (!buffer) { + *recovery = 1; f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", block + 1); if (block == 0) { block++; goto retry; } else { - return -EIO; + err = -EIO; + goto out; } } - *raw_super = (struct f2fs_super_block *) - ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); + super = (struct f2fs_super_block *) + ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET); /* sanity checking of raw super */ - if (sanity_check_raw_super(sb, *raw_super)) { - brelse(*raw_super_buf); + if (sanity_check_raw_super(sb, super)) { + brelse(buffer); + *recovery = 1; f2fs_msg(sb, KERN_ERR, "Can't find valid F2FS filesystem in %dth superblock", block + 1); @@ -1000,10 +1007,30 @@ static int read_raw_super_block(struct super_block *sb, block++; goto retry; } else { - return -EINVAL; + err = -EINVAL; + goto out; } } + if (!*raw_super) { + *raw_super_buf = buffer; + *raw_super = super; + } else { + /* already have a valid superblock */ + brelse(buffer); + } + + /* check the validity of the second superblock */ + if (block == 0) { + block++; + goto retry; + } + +out: + /* No valid superblock */ + if (!*raw_super) + return err; + return 0; } @@ -1034,15 +1061,20 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi) static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; - struct f2fs_super_block *raw_super = NULL; + struct f2fs_super_block *raw_super; struct buffer_head *raw_super_buf; struct inode *root; - long err = -EINVAL; + long err; bool retry = true, need_fsck = false; char *options = NULL; - int i; + int recovery, i; try_onemore: + err = -EINVAL; + raw_super = NULL; + raw_super_buf = NULL; + recovery = 0; + /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); if (!sbi) @@ -1054,7 +1086,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - err = read_raw_super_block(sb, &raw_super, &raw_super_buf); + err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery); if (err) goto free_sbi; @@ -1252,6 +1284,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_kobj; } kfree(options); + + /* recover broken superblock */ + if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) { + f2fs_msg(sb, KERN_INFO, "Recover invalid superblock"); + f2fs_commit_super(sbi); + } + return 0; free_kobj: From b063f1d837d0c5f4cf2dd8337bc7ff2747374384 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 May 2015 22:26:54 -0700 Subject: [PATCH 295/321] f2fs crypto: use per-inode tfm structure This patch applies the following ext4 patch: ext4 crypto: use per-inode tfm structure As suggested by Herbert Xu, we shouldn't allocate a new tfm each time we read or write a page. Instead we can use a single tfm hanging off the inode's crypt_info structure for all of our encryption needs for that inode, since the tfm can be used by multiple crypto requests in parallel. Also use cmpxchg() to avoid races that could result in crypt_info structure getting doubly allocated or doubly freed. Signed-off-by: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 82 +++---------------------------- fs/f2fs/crypto_fname.c | 48 +----------------- fs/f2fs/crypto_key.c | 107 ++++++++++++++++++++++++++++++----------- fs/f2fs/dir.c | 8 +-- fs/f2fs/f2fs.h | 5 +- fs/f2fs/f2fs_crypto.h | 4 -- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 4 +- fs/f2fs/super.c | 3 +- 9 files changed, 96 insertions(+), 167 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index c6d11229..2c7819ae 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -91,8 +91,6 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) } ctx->w.control_page = NULL; if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { - if (ctx->tfm) - crypto_free_tfm(ctx->tfm); kmem_cache_free(f2fs_crypto_ctx_cachep, ctx); } else { spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); @@ -113,7 +111,6 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) { struct f2fs_crypto_ctx *ctx = NULL; - int res = 0; unsigned long flags; struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; @@ -138,56 +135,13 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); if (!ctx) { ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS); - if (!ctx) { - res = -ENOMEM; - goto out; - } + if (!ctx) + return ERR_PTR(-ENOMEM); ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } else { ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } ctx->flags &= ~F2FS_WRITE_PATH_FL; - - /* - * Allocate a new Crypto API context if we don't already have - * one or if it isn't the right mode. - */ - if (ctx->tfm && (ctx->mode != ci->ci_data_mode)) { - crypto_free_tfm(ctx->tfm); - ctx->tfm = NULL; - ctx->mode = F2FS_ENCRYPTION_MODE_INVALID; - } - if (!ctx->tfm) { - switch (ci->ci_data_mode) { - case F2FS_ENCRYPTION_MODE_AES_256_XTS: - ctx->tfm = crypto_ablkcipher_tfm( - crypto_alloc_ablkcipher("xts(aes)", 0, 0)); - break; - case F2FS_ENCRYPTION_MODE_AES_256_GCM: - /* - * TODO(mhalcrow): AEAD w/ gcm(aes); - * crypto_aead_setauthsize() - */ - ctx->tfm = ERR_PTR(-ENOTSUPP); - break; - default: - BUG(); - } - if (IS_ERR_OR_NULL(ctx->tfm)) { - res = PTR_ERR(ctx->tfm); - ctx->tfm = NULL; - goto out; - } - ctx->mode = ci->ci_data_mode; - } - BUG_ON(ci->ci_size != f2fs_encryption_key_size(ci->ci_data_mode)); - -out: - if (res) { - if (!IS_ERR_OR_NULL(ctx)) - f2fs_release_crypto_ctx(ctx); - ctx = ERR_PTR(res); - } return ctx; } @@ -229,11 +183,8 @@ static void f2fs_crypto_destroy(void) { struct f2fs_crypto_ctx *pos, *n; - list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) { - if (pos->tfm) - crypto_free_tfm(pos->tfm); + list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) kmem_cache_free(f2fs_crypto_ctx_cachep, pos); - } INIT_LIST_HEAD(&f2fs_free_crypto_ctxs); if (f2fs_bounce_page_pool) mempool_destroy(f2fs_bounce_page_pool); @@ -383,32 +334,11 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx, struct ablkcipher_request *req = NULL; DECLARE_F2FS_COMPLETION_RESULT(ecr); struct scatterlist dst, src; - struct f2fs_inode_info *fi = F2FS_I(inode); - struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm); + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; int res = 0; - BUG_ON(!ctx->tfm); - BUG_ON(ctx->mode != fi->i_crypt_info->ci_data_mode); - - if (ctx->mode != F2FS_ENCRYPTION_MODE_AES_256_XTS) { - printk_ratelimited(KERN_ERR - "%s: unsupported crypto algorithm: %d\n", - __func__, ctx->mode); - return -ENOTSUPP; - } - - crypto_ablkcipher_clear_flags(atfm, ~0); - crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY); - - res = crypto_ablkcipher_setkey(atfm, fi->i_crypt_info->ci_raw, - fi->i_crypt_info->ci_size); - if (res) { - printk_ratelimited(KERN_ERR - "%s: crypto_ablkcipher_setkey() failed\n", - __func__); - return res; - } - req = ablkcipher_request_alloc(atfm, GFP_NOFS); + req = ablkcipher_request_alloc(tfm, GFP_NOFS); if (!req) { printk_ratelimited(KERN_ERR "%s: crypto_request_alloc() failed\n", diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c index 016c4b63..81852cca 100644 --- a/fs/f2fs/crypto_fname.c +++ b/fs/f2fs/crypto_fname.c @@ -249,52 +249,6 @@ static int digest_decode(const char *src, int len, char *dst) return cp - dst; } -int f2fs_setup_fname_crypto(struct inode *inode) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_crypt_info *ci = fi->i_crypt_info; - struct crypto_ablkcipher *ctfm; - int res; - - /* Check if the crypto policy is set on the inode */ - res = f2fs_encrypted_inode(inode); - if (res == 0) - return 0; - - res = f2fs_get_encryption_info(inode); - if (res < 0) - return res; - ci = fi->i_crypt_info; - - if (!ci || ci->ci_ctfm) - return 0; - - if (ci->ci_filename_mode != F2FS_ENCRYPTION_MODE_AES_256_CTS) { - printk_once(KERN_WARNING "f2fs: unsupported key mode %d\n", - ci->ci_filename_mode); - return -ENOKEY; - } - - ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))", 0, 0); - if (!ctfm || IS_ERR(ctfm)) { - res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; - printk(KERN_DEBUG "%s: error (%d) allocating crypto tfm\n", - __func__, res); - return res; - } - crypto_ablkcipher_clear_flags(ctfm, ~0); - crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), - CRYPTO_TFM_REQ_WEAK_KEY); - - res = crypto_ablkcipher_setkey(ctfm, ci->ci_raw, ci->ci_size); - if (res) { - crypto_free_ablkcipher(ctfm); - return -EIO; - } - ci->ci_ctfm = ctfm; - return 0; -} - /** * f2fs_fname_crypto_round_up() - * @@ -427,7 +381,7 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, fname->disk_name.len = iname->len; return 0; } - ret = f2fs_setup_fname_crypto(dir); + ret = f2fs_get_encryption_info(dir); if (ret) return ret; ci = F2FS_I(dir)->i_crypt_info; diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index 8a10569a..95b8f936 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c @@ -87,20 +87,31 @@ static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE], return res; } -void f2fs_free_encryption_info(struct inode *inode) +static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci) { - struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_crypt_info *ci = fi->i_crypt_info; - if (!ci) return; if (ci->ci_keyring_key) key_put(ci->ci_keyring_key); crypto_free_ablkcipher(ci->ci_ctfm); - memzero_explicit(&ci->ci_raw, sizeof(ci->ci_raw)); kmem_cache_free(f2fs_crypt_info_cachep, ci); - fi->i_crypt_info = NULL; +} + +void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *prev; + + if (ci == NULL) + ci = ACCESS_ONCE(fi->i_crypt_info); + if (ci == NULL) + return; + prev = cmpxchg(&fi->i_crypt_info, ci, NULL); + if (prev != ci) + return; + + f2fs_free_crypt_info(ci); } int _f2fs_get_encryption_info(struct inode *inode) @@ -113,17 +124,23 @@ int _f2fs_get_encryption_info(struct inode *inode) struct f2fs_encryption_key *master_key; struct f2fs_encryption_context ctx; struct user_key_payload *ukp; + struct crypto_ablkcipher *ctfm; + const char *cipher_str; + char raw_key[F2FS_MAX_KEY_SIZE]; + char mode; int res; res = f2fs_crypto_initialize(); if (res) return res; - - if (fi->i_crypt_info) { - if (!fi->i_crypt_info->ci_keyring_key || - key_validate(fi->i_crypt_info->ci_keyring_key) == 0) +retry: + crypt_info = ACCESS_ONCE(fi->i_crypt_info); + if (crypt_info) { + if (!crypt_info->ci_keyring_key || + key_validate(crypt_info->ci_keyring_key) == 0) return 0; - f2fs_free_encryption_info(inode); + f2fs_free_encryption_info(inode, crypt_info); + goto retry; } res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, @@ -143,18 +160,30 @@ int _f2fs_get_encryption_info(struct inode *inode) crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; crypt_info->ci_ctfm = NULL; + crypt_info->ci_keyring_key = NULL; memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); if (S_ISREG(inode->i_mode)) - crypt_info->ci_size = - f2fs_encryption_key_size(crypt_info->ci_data_mode); + mode = crypt_info->ci_data_mode; else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - crypt_info->ci_size = - f2fs_encryption_key_size(crypt_info->ci_filename_mode); + mode = crypt_info->ci_filename_mode; else BUG(); - BUG_ON(!crypt_info->ci_size); + switch (mode) { + case F2FS_ENCRYPTION_MODE_AES_256_XTS: + cipher_str = "xts(aes)"; + break; + case F2FS_ENCRYPTION_MODE_AES_256_CTS: + cipher_str = "cts(cbc(aes))"; + break; + default: + printk_once(KERN_WARNING + "f2fs: unsupported key mode %d (ino %u)\n", + mode, (unsigned) inode->i_ino); + res = -ENOKEY; + goto out; + } memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX, F2FS_KEY_DESC_PREFIX_SIZE); @@ -169,6 +198,7 @@ int _f2fs_get_encryption_info(struct inode *inode) keyring_key = NULL; goto out; } + crypt_info->ci_keyring_key = keyring_key; BUG_ON(keyring_key->type != &key_type_logon); ukp = ((struct user_key_payload *)keyring_key->payload.data); if (ukp->datalen != sizeof(struct f2fs_encryption_key)) { @@ -180,19 +210,40 @@ int _f2fs_get_encryption_info(struct inode *inode) F2FS_KEY_DERIVATION_NONCE_SIZE); BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE); res = f2fs_derive_key_aes(ctx.nonce, master_key->raw, - crypt_info->ci_raw); -out: - if (res < 0) { - if (res == -ENOKEY) - res = 0; - kmem_cache_free(f2fs_crypt_info_cachep, crypt_info); - } else { - fi->i_crypt_info = crypt_info; - crypt_info->ci_keyring_key = keyring_key; - keyring_key = NULL; + raw_key); + if (res) + goto out; + + ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0); + if (!ctfm || IS_ERR(ctfm)) { + res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; + printk(KERN_DEBUG + "%s: error %d (inode %u) allocating crypto tfm\n", + __func__, res, (unsigned) inode->i_ino); + goto out; } - if (keyring_key) - key_put(keyring_key); + crypt_info->ci_ctfm = ctfm; + crypto_ablkcipher_clear_flags(ctfm, ~0); + crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), + CRYPTO_TFM_REQ_WEAK_KEY); + res = crypto_ablkcipher_setkey(ctfm, raw_key, + f2fs_encryption_key_size(mode)); + if (res) + goto out; + + memzero_explicit(raw_key, sizeof(raw_key)); + if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) { + f2fs_free_crypt_info(crypt_info); + goto retry; + } + return 0; + +out: + if (res == -ENOKEY && !S_ISREG(inode->i_mode)) + res = 0; + + f2fs_free_crypt_info(crypt_info); + memzero_explicit(raw_key, sizeof(raw_key)); return res; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 354f4994..7db480de 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -842,11 +842,11 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) unsigned int n = 0; int err = 0; - err = f2fs_setup_fname_crypto(inode); - if (err) - return err; - if (f2fs_encrypted_inode(inode)) { + err = f2fs_get_encryption_info(inode); + if (err) + return err; + err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); if (err < 0) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 343a8f33..16dead87 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2014,7 +2014,7 @@ int f2fs_decrypt_one(struct inode *, struct page *); void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); /* crypto_key.c */ -void f2fs_free_encryption_info(struct inode *); +void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *); int _f2fs_get_encryption_info(struct inode *inode); /* crypto_fname.c */ @@ -2049,7 +2049,6 @@ static inline int f2fs_get_encryption_info(struct inode *inode) return 0; } -int f2fs_setup_fname_crypto(struct inode *); void f2fs_fname_crypto_free_buffer(struct f2fs_str *); int f2fs_fname_setup_filename(struct inode *, const struct qstr *, int lookup, struct f2fs_filename *); @@ -2063,8 +2062,6 @@ static inline void f2fs_exit_crypto(void) { } static inline int f2fs_has_encryption_key(struct inode *i) { return 0; } static inline int f2fs_get_encryption_info(struct inode *i) { return 0; } - -static inline int f2fs_setup_fname_crypto(struct inode *i) { return 0; } static inline void f2fs_fname_crypto_free_buffer(struct f2fs_str *p) { } static inline int f2fs_fname_setup_filename(struct inode *dir, diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index e33cec9c..be59d919 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -75,13 +75,11 @@ struct f2fs_encryption_key { } __attribute__((__packed__)); struct f2fs_crypt_info { - unsigned char ci_size; char ci_data_mode; char ci_filename_mode; char ci_flags; struct crypto_ablkcipher *ci_ctfm; struct key *ci_keyring_key; - char ci_raw[F2FS_MAX_KEY_SIZE]; char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE]; }; @@ -90,7 +88,6 @@ struct f2fs_crypt_info { #define F2FS_WRITE_PATH_FL 0x00000004 struct f2fs_crypto_ctx { - struct crypto_tfm *tfm; /* Crypto API context */ union { struct { struct page *bounce_page; /* Ciphertext page */ @@ -103,7 +100,6 @@ struct f2fs_crypto_ctx { struct list_head free_list; /* Free list */ }; char flags; /* Flags */ - char mode; /* Encryption mode for tfm */ }; struct f2fs_completion_result { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 30d2db9f..f8e6cf6d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -363,7 +363,7 @@ void f2fs_evict_inode(struct inode *inode) out_clear: #ifdef CONFIG_F2FS_FS_ENCRYPTION if (F2FS_I(inode)->i_crypt_info) - f2fs_free_encryption_info(inode); + f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info); #endif clear_inode(inode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2c805cd1..56d7bbac 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -367,7 +367,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (err) goto err_out; - err = f2fs_setup_fname_crypto(inode); + err = f2fs_get_encryption_info(inode); if (err) goto err_out; @@ -664,7 +664,7 @@ static void *f2fs_encrypted_follow_link(struct dentry *dentry, u32 max_size = inode->i_sb->s_blocksize; int res; - res = f2fs_setup_fname_crypto(inode); + res = f2fs_get_encryption_info(inode); if (res) return ERR_PTR(res); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d4cd04dc..8b5f5fb8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -449,7 +449,8 @@ static int f2fs_drop_inode(struct inode *inode) #ifdef CONFIG_F2FS_FS_ENCRYPTION if (F2FS_I(inode)->i_crypt_info) - f2fs_free_encryption_info(inode); + f2fs_free_encryption_info(inode, + F2FS_I(inode)->i_crypt_info); #endif spin_lock(&inode->i_lock); } From ea2177c0f2b72345bf17693e7f2049dc8743f08b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 May 2015 18:03:38 +0800 Subject: [PATCH 296/321] f2fs: avoid duplicated code by reusing f2fs_read_end_io This patch tries to clean up code because part code of f2fs_read_end_io and mpage_end_io are the same, so it's better to merge and reuse them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 06f65b20..cd003b88 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -35,29 +35,6 @@ static void f2fs_read_end_io(struct bio *bio, int err) struct bio_vec *bvec; int i; - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (!err) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); - } - bio_put(bio); -} - -/* - * I/O completion handler for multipage BIOs. - * copied from fs/mpage.c - */ -static void mpage_end_io(struct bio *bio, int err) -{ - struct bio_vec *bv; - int i; - if (f2fs_bio_encrypted(bio)) { if (err) { f2fs_release_crypto_ctx(bio->bi_private); @@ -67,8 +44,8 @@ static void mpage_end_io(struct bio *bio, int err) } } - bio_for_each_segment_all(bv, bio, i) { - struct page *page = bv->bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (!err) { SetPageUptodate(page); @@ -78,7 +55,6 @@ static void mpage_end_io(struct bio *bio, int err) } unlock_page(page); } - bio_put(bio); } @@ -123,7 +99,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, bio->bi_bdev = sbi->sb->s_bdev; bio->bi_sector = SECTOR_FROM_BLOCK(blk_addr); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; - bio->bi_private = sbi; + bio->bi_private = is_read ? NULL : sbi; return bio; } @@ -1585,7 +1561,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, } bio->bi_bdev = bdev; bio->bi_sector = SECTOR_FROM_BLOCK(block_nr); - bio->bi_end_io = mpage_end_io; + bio->bi_end_io = f2fs_read_end_io; bio->bi_private = ctx; } From bd0d61af0880ae319a2df5d3aff708e722c7da54 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 May 2015 18:09:03 +0800 Subject: [PATCH 297/321] f2fs crypto: allow setting encryption policy once This patch add XATTR_CREATE flag in setxattr when setting encryption context for inode. Without this flag the context could be set more than once, this should never happen. So, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c index bef254b2..30b0b73d 100644 --- a/fs/f2fs/crypto_policy.c +++ b/fs/f2fs/crypto_policy.c @@ -83,7 +83,7 @@ static int f2fs_create_encryption_context_from_policy( return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, - sizeof(ctx), NULL, 0); + sizeof(ctx), NULL, XATTR_CREATE); } int f2fs_process_policy(const struct f2fs_encryption_policy *policy, @@ -202,5 +202,5 @@ int f2fs_inherit_context(struct inode *parent, struct inode *child, get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE); return f2fs_setxattr(child, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, - sizeof(ctx), ipage, 0); + sizeof(ctx), ipage, XATTR_CREATE); } From 01be9b95b7465d5c009ffbe0c398622398426dcd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 27 May 2015 15:27:49 +0800 Subject: [PATCH 298/321] f2fs crypto: do not set encryption policy for non-directory by ioctl Encryption policy should only be set to an empty directory through ioctl, This patch add a judgement condition to verify type of the target inode to avoid incorrectly configuring for non-directory. Additionally, remove unneeded inline data conversion since regular or symlink file should not be processed here. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto_policy.c | 3 +++ fs/f2fs/file.c | 6 ------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c index 30b0b73d..d4a96af5 100644 --- a/fs/f2fs/crypto_policy.c +++ b/fs/f2fs/crypto_policy.c @@ -92,6 +92,9 @@ int f2fs_process_policy(const struct f2fs_encryption_policy *policy, if (policy->version != 0) return -EINVAL; + if (!S_ISDIR(inode->i_mode)) + return -EINVAL; + if (!f2fs_inode_has_encryption_context(inode)) { if (!f2fs_empty_dir(inode)) return -ENOTEMPTY; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d5646a77..cddb2aeb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1413,12 +1413,6 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) sizeof(policy))) return -EFAULT; - if (f2fs_has_inline_data(inode)) { - int ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } - return f2fs_process_policy(&policy, inode); #else return -EOPNOTSUPP; From 46536933ad2cbda147c2421eb3574ea00a2dc249 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 27 May 2015 19:51:42 -0700 Subject: [PATCH 299/321] f2fs crypto: avoid f2fs_inherit_context for symlink This patch fixes to call f2fs_inherit_context twice for newly created symlink. The original one is called by f2fs_add_link(), which invokes f2fs_setxattr. If the second one is called again, f2fs_setxattr is triggered again with same encryption index. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 56d7bbac..eaca4e2e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -363,10 +363,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (f2fs_encrypted_inode(dir)) { struct qstr istr = QSTR_INIT(symname, len); - err = f2fs_inherit_context(dir, inode, NULL); - if (err) - goto err_out; - err = f2fs_get_encryption_info(inode); if (err) goto err_out; From e8ae32c5ef9e9f22be13d94d88c71cb101a9616b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 28 May 2015 17:06:40 -0700 Subject: [PATCH 300/321] f2fs crypto: clean up error handling in f2fs_fname_setup_filename Sync with: ext4 crypto: clean up error handling in ext4_fname_setup_filename Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto_fname.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c index 81852cca..ab377d49 100644 --- a/fs/f2fs/crypto_fname.c +++ b/fs/f2fs/crypto_fname.c @@ -392,15 +392,13 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, return ret; ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf); if (ret < 0) - goto out; + goto errout; fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; return 0; } - if (!lookup) { - ret = -EACCES; - goto out; - } + if (!lookup) + return -EACCES; /* We don't have the key and we are doing a lookup; decode the * user-supplied name @@ -408,19 +406,17 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, if (iname->name[0] == '_') bigname = 1; if ((bigname && (iname->len != 33)) || - (!bigname && (iname->len > 43))) { - ret = -ENOENT; - } + (!bigname && (iname->len > 43))) + return -ENOENT; + fname->crypto_buf.name = kmalloc(32, GFP_KERNEL); - if (fname->crypto_buf.name == NULL) { - ret = -ENOMEM; - goto out; - } + if (fname->crypto_buf.name == NULL) + return -ENOMEM; ret = digest_decode(iname->name + bigname, iname->len - bigname, fname->crypto_buf.name); if (ret < 0) { ret = -ENOENT; - goto out; + goto errout; } fname->crypto_buf.len = ret; if (bigname) { @@ -430,7 +426,7 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, fname->disk_name.len = fname->crypto_buf.len; } return 0; -out: +errout: f2fs_fname_crypto_free_buffer(&fname->crypto_buf); return ret; } From 7758ba05f60a0c5672b724a827f07d1f5ad23737 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 28 May 2015 18:19:17 -0700 Subject: [PATCH 301/321] f2fs: fix a deadlock for summary page lock vs. sentry_lock In f2fs_gc: In f2fs_replace_block: - lock_page(sum_page) - check_valid_map() - mutex_lock(sentry_lock) - mutex_lock(sentry_lock) - change_curseg() - lock_page(sum_page) This patch fixes the deadlock condition. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 43354cb3..e1e73617 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -750,6 +750,15 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, sum = page_address(sum_page); + /* + * this is to avoid deadlock: + * - lock_page(sum_page) - f2fs_replace_block + * - check_valid_map() - mutex_lock(sentry_lock) + * - mutex_lock(sentry_lock) - change_curseg() + * - lock_page(sum_page) + */ + unlock_page(sum_page); + switch (GET_SUM_TYPE((&sum->footer))) { case SUM_TYPE_NODE: gc_node_segment(sbi, sum->entries, segno, gc_type); @@ -763,7 +772,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type); stat_inc_call_count(sbi->stat_info); - f2fs_put_page(sum_page, 1); + f2fs_put_page(sum_page, 0); } int f2fs_gc(struct f2fs_sb_info *sbi) From 1dd4092841cca92b7ed2babbb75167fb57d8b4de Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 Jun 2015 12:39:30 -0700 Subject: [PATCH 302/321] f2fs crypto: remove alloc_page for Kinzie_page We don't need to call alloc_page() prior to mempool_alloc(), since the mempool_alloc() calls alloc_page() internally. And, if __GFP_WAIT is set, it never fails on page allocation, so let's give GFP_NOWAIT and handle ENOMEM by writepage(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 33 ++++++++++++--------------------- fs/f2fs/f2fs_crypto.h | 3 +-- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 2c7819ae..be6af181 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -83,10 +83,7 @@ void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) unsigned long flags; if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) { - if (ctx->flags & F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) - __free_page(ctx->w.bounce_page); - else - mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool); + mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool); ctx->w.bounce_page = NULL; } ctx->w.control_page = NULL; @@ -408,34 +405,28 @@ struct page *f2fs_encrypt(struct inode *inode, return (struct page *)ctx; /* The encryption operation will require a bounce page. */ - ciphertext_page = alloc_page(GFP_NOFS); + ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT); if (!ciphertext_page) { - /* - * This is a potential bottleneck, but at least we'll have - * forward progress. - */ - ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, - GFP_NOFS); - if (WARN_ON_ONCE(!ciphertext_page)) - ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, - GFP_NOFS | __GFP_WAIT); - ctx->flags &= ~F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; - } else { - ctx->flags |= F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; + err = -ENOMEM; + goto err_out; } + ctx->flags |= F2FS_WRITE_PATH_FL; ctx->w.bounce_page = ciphertext_page; ctx->w.control_page = plaintext_page; err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index, plaintext_page, ciphertext_page); - if (err) { - f2fs_release_crypto_ctx(ctx); - return ERR_PTR(err); - } + if (err) + goto err_out; + SetPagePrivate(ciphertext_page); set_page_private(ciphertext_page, (unsigned long)ctx); lock_page(ciphertext_page); return ciphertext_page; + +err_out: + f2fs_release_crypto_ctx(ctx); + return ERR_PTR(err); } /** diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index be59d919..c2c1c2b6 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -84,8 +84,7 @@ struct f2fs_crypt_info { }; #define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 -#define F2FS_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002 -#define F2FS_WRITE_PATH_FL 0x00000004 +#define F2FS_WRITE_PATH_FL 0x00000002 struct f2fs_crypto_ctx { union { From 793dfd5725d31f26e0786c0839aa9f3dff6e5e27 Mon Sep 17 00:00:00 2001 From: Chenxi Mao Date: Sat, 30 May 2015 22:56:46 +0800 Subject: [PATCH 303/321] f2fs: disable the discard option when device doesn't support Current f2fs check the whether the blk device can support discard. However, the code will cause the discard option cannot be enabled. Because the clear_opt(sbi, DISCARD) will be invoked forever. This patch can fix this issue. Jaegeuk Kim: The original patch was intended to disable the discard option when device does not support trim command. Rather than remaining the buggy patch, let's replace with this patch as an integrated one. Signed-off-by: Chenxi Mao Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8b5f5fb8..f0be6a62 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1237,10 +1237,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (test_opt(sbi, DISCARD)) { struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) + if (!blk_queue_discard(q)) { f2fs_msg(sb, KERN_WARNING, "mounting with \"discard\" option, but " "the device does not support discard"); + clear_opt(sbi, DISCARD); + } } sbi->s_kobj.kset = f2fs_kset; From a6f6f7ee2005e72f414efd5f627c823c828a5acf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 28 May 2015 19:15:35 +0800 Subject: [PATCH 304/321] f2fs: hide common code in f2fs_replace_block This patch clean up codes through: 1.rename f2fs_replace_block to __f2fs_replace_block(). 2.introduce new f2fs_replace_block() to include __f2fs_replace_block() and some common related codes around __f2fs_replace_block(). Then, newly introduced function f2fs_replace_block can be used by following patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/file.c | 12 ++---------- fs/f2fs/recovery.c | 9 ++------- fs/f2fs/segment.c | 18 +++++++++++++++++- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 16dead87..843d658d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1694,8 +1694,8 @@ void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(unsigned int, struct f2fs_io_info *); void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); void rewrite_data_page(struct f2fs_io_info *); -void f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *, - block_t, block_t, bool); +void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *, + block_t, block_t, unsigned char, bool); void allocate_data_block(struct f2fs_sb_info *, struct page *, block_t, block_t *, struct f2fs_summary *, int); void f2fs_wait_on_page_writeback(struct page *, enum page_type); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cddb2aeb..e4b31efa 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -871,18 +871,10 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) set_data_blkaddr(&dn); } else if (new_addr != NEW_ADDR) { struct node_info ni; - struct f2fs_summary sum; get_node_info(sbi, dn.nid, &ni); - set_summary(&sum, dn.nid, dn.ofs_in_node, - ni.version); - - f2fs_replace_block(sbi, &sum, old_addr, - new_addr, true); - - dn.data_blkaddr = new_addr; - set_data_blkaddr(&dn); - f2fs_update_extent_cache(&dn); + f2fs_replace_block(sbi, &dn, old_addr, new_addr, + ni.version, true); } f2fs_put_dnode(&dn); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index cb400eef..bae8b8c2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -360,7 +360,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int start, end; struct dnode_of_data dn; - struct f2fs_summary sum; struct node_info ni; int err = 0, recovered = 0; @@ -420,13 +419,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (err) goto err; - set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); - /* write dummy data page */ - f2fs_replace_block(sbi, &sum, src, dest, false); - dn.data_blkaddr = dest; - set_data_blkaddr(&dn); - f2fs_update_extent_cache(&dn); + f2fs_replace_block(sbi, &dn, src, dest, + ni.version, false); recovered++; } dn.ofs_in_node++; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bcaf658c..f2a0a9b4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1361,7 +1361,8 @@ void rewrite_data_page(struct f2fs_io_info *fio) f2fs_submit_page_mbio(fio); } -void f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static void __f2fs_replace_block(struct f2fs_sb_info *sbi, + struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg) { @@ -1421,6 +1422,21 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, mutex_unlock(&curseg->curseg_mutex); } +void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, + block_t old_addr, block_t new_addr, + unsigned char version, bool recover_curseg) +{ + struct f2fs_summary sum; + + set_summary(&sum, dn->nid, dn->ofs_in_node, version); + + __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg); + + dn->data_blkaddr = new_addr; + set_data_blkaddr(dn); + f2fs_update_extent_cache(dn); +} + static inline bool is_merged_page(struct f2fs_sb_info *sbi, struct page *page, enum page_type type) { From e8a5fb170c42a6a8b03500c7eb4f949dbcbc2330 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 28 May 2015 19:16:57 +0800 Subject: [PATCH 305/321] f2fs: support FALLOC_FL_INSERT_RANGE FALLOC_FL_INSERT_RANGE flag for ->fallocate was introduced in commit dd46c787788d ("fs: Add support FALLOC_FL_INSERT_RANGE for fallocate"). The effect of FALLOC_FL_INSERT_RANGE command is the opposite of FALLOC_FL_COLLAPSE_RANGE, if this command was performed, all data from offset to EOF in our file will be shifted to right as given length, and then range [offset, offset + length] becomes a hole. This command is useful for our user who wants to add some data in the middle of the file, for example: video/music editor will insert a keyframe in specified position of media file, with this command we can easily create a hole for inserting without removing original data. This patch introduces f2fs_insert_range() to support FALLOC_FL_INSERT_RANGE. Change-Id: Ie77dd7e87d9c40c531b1c8dbb2b3cce2da74bbe1 Signed-off-by: Chao Yu Signed-off-by: Yuan Zhong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e4b31efa..2cfebc4f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1027,6 +1027,100 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, return ret; } +static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + pgoff_t pg_start, pg_end, delta, nrpages, idx; + loff_t new_size; + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + new_size = i_size_read(inode) + len; + if (new_size > inode->i_sb->s_maxbytes) + return -EFBIG; + + if (offset >= i_size_read(inode)) + return -EINVAL; + + /* insert range should be aligned to block size of f2fs. */ + if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) + return -EINVAL; + + f2fs_balance_fs(sbi); + + ret = truncate_blocks(inode, i_size_read(inode), true); + if (ret) + return ret; + + /* write out all dirty pages from offset */ + ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); + if (ret) + return ret; + + truncate_pagecache(inode, 0, offset); + + pg_start = offset >> PAGE_CACHE_SHIFT; + pg_end = (offset + len) >> PAGE_CACHE_SHIFT; + delta = pg_end - pg_start; + nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + + for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) { + struct dnode_of_data dn; + struct page *ipage; + block_t new_addr, old_addr; + + f2fs_lock_op(sbi); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, idx, LOOKUP_NODE_RA); + if (ret && ret != -ENOENT) { + goto out; + } else if (ret == -ENOENT) { + goto next; + } else if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_dnode(&dn); + goto next; + } else { + new_addr = dn.data_blkaddr; + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + } + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + ret = PTR_ERR(ipage); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + ret = f2fs_reserve_block(&dn, idx + delta); + if (ret) + goto out; + + old_addr = dn.data_blkaddr; + f2fs_bug_on(sbi, old_addr != NEW_ADDR); + + if (new_addr != NEW_ADDR) { + struct node_info ni; + + get_node_info(sbi, dn.nid, &ni); + f2fs_replace_block(sbi, &dn, old_addr, new_addr, + ni.version, true); + } + f2fs_put_dnode(&dn); +next: + f2fs_unlock_op(sbi); + } + + i_size_write(inode, new_size); + return 0; +out: + f2fs_unlock_op(sbi); + return ret; +} + static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { @@ -1090,6 +1184,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, #define FALLOC_FL_COLLAPSE_RANGE 0X08 #define FALLOC_FL_ZERO_RANGE 0X10 +#define FALLOC_FL_INSERT_RANGE 0X20 static long f2fs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) @@ -1097,11 +1192,13 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; - if (f2fs_encrypted_inode(inode) && (mode & FALLOC_FL_COLLAPSE_RANGE)) + if (f2fs_encrypted_inode(inode) && + (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | + FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); @@ -1115,6 +1212,8 @@ static long f2fs_fallocate(struct file *file, int mode, ret = f2fs_collapse_range(inode, offset, len); } else if (mode & FALLOC_FL_ZERO_RANGE) { ret = f2fs_zero_range(inode, offset, len, mode); + } else if (mode & FALLOC_FL_INSERT_RANGE) { + ret = f2fs_insert_range(inode, offset, len); } else { ret = expand_inode_data(inode, offset, len, mode); } From 670bbea9c14c2b9d8287a3c13f52e117db3e1179 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 2 Jun 2015 15:48:20 -0700 Subject: [PATCH 306/321] f2fs: fix to return exact trimmed size Now, we add all the candidates for trim commands and then finally issue discard commands. So, we should count the trimmed size in back-end. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f2a0a9b4..177c37ab 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -603,7 +603,6 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi, list_add_tail(&new->list, head); done: SM_I(sbi)->nr_discards += end - start; - cpc->trimmed += end - start; } static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) @@ -705,6 +704,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) goto skip; f2fs_issue_discard(sbi, entry->blkaddr, entry->len); + cpc->trimmed += entry->len; skip: list_del(&entry->list); SM_I(sbi)->nr_discards -= entry->len; From f9597e28b6363c6a88dacd9c3537b748d28cec30 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 11 Jun 2015 14:23:05 -0700 Subject: [PATCH 307/321] f2fs crypto: fix to handle errors likewise ext4 This patch makes some error handling policies same with ext4. Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index be6af181..75d62fff 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -112,7 +112,7 @@ struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; if (ci == NULL) - return ERR_PTR(-EACCES); + return ERR_PTR(-ENOKEY); /* * We first try getting the ctx from a free list because in @@ -457,8 +457,8 @@ int f2fs_decrypt_one(struct inode *inode, struct page *page) struct f2fs_crypto_ctx *ctx = f2fs_get_crypto_ctx(inode); int ret; - if (!ctx) - return -ENOMEM; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ret = f2fs_decrypt(ctx, page); f2fs_release_crypto_ctx(ctx); return ret; From 875fc5e62804881ee6d2aa7dffbb4be8db1b5dc2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 11 Jun 2015 14:33:29 -0700 Subject: [PATCH 308/321] f2fs crypto: add alloc_Kinzie_page This patch adds alloc_Kinzie_page likewise ext4. Signed-off-by: Jaegeuk Kim --- fs/f2fs/crypto.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c index 75d62fff..4a62ef14 100644 --- a/fs/f2fs/crypto.c +++ b/fs/f2fs/crypto.c @@ -376,6 +376,15 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx, return 0; } +static struct page *alloc_bounce_page(struct f2fs_crypto_ctx *ctx) +{ + ctx->w.bounce_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT); + if (ctx->w.bounce_page == NULL) + return ERR_PTR(-ENOMEM); + ctx->flags |= F2FS_WRITE_PATH_FL; + return ctx->w.bounce_page; +} + /** * f2fs_encrypt() - Encrypts a page * @inode: The inode for which the encryption should take place @@ -405,19 +414,17 @@ struct page *f2fs_encrypt(struct inode *inode, return (struct page *)ctx; /* The encryption operation will require a bounce page. */ - ciphertext_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT); - if (!ciphertext_page) { - err = -ENOMEM; + ciphertext_page = alloc_bounce_page(ctx); + if (IS_ERR(ciphertext_page)) goto err_out; - } - ctx->flags |= F2FS_WRITE_PATH_FL; - ctx->w.bounce_page = ciphertext_page; ctx->w.control_page = plaintext_page; err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index, plaintext_page, ciphertext_page); - if (err) + if (err) { + ciphertext_page = ERR_PTR(err); goto err_out; + } SetPagePrivate(ciphertext_page); set_page_private(ciphertext_page, (unsigned long)ctx); @@ -426,7 +433,7 @@ struct page *f2fs_encrypt(struct inode *inode, err_out: f2fs_release_crypto_ctx(ctx); - return ERR_PTR(err); + return ciphertext_page; } /** From 32a795a4687771bf21d2a5b80f359561ccd22da2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 5 Jun 2015 18:34:02 +0800 Subject: [PATCH 309/321] f2fs: do not trim preallocated blocks when truncating after i_size When we perform generic/092 in xfstests, output is like below: XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) 0: [0..10239]: data 0: [0..10239]: data -1: [10240..20479]: unwritten +1: [10240..14335]: unwritten This is because with this testcase, we redefine the regulation for truncate in perallocated space past i_size as below: "There was some confused about what the fs was supposed to do when you truncate at i_size with preallocated space past i_size. We decided on the following things. 1) truncate(i_size) will trim all blocks past i_size. 2) truncate(x) where x > i_size will not trim all blocks past i_size. " This method is used in xfs, and then ext4/btrfs will follow the rule. This patch fixes to follow the new rule for f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2cfebc4f..3943d961 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -669,16 +669,16 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) f2fs_get_encryption_info(inode)) return -EACCES; - if (attr->ia_size != i_size_read(inode)) { + if (attr->ia_size <= i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); f2fs_balance_fs(F2FS_I_SB(inode)); } else { /* - * giving a chance to truncate blocks past EOF which - * are fallocated with FALLOC_FL_KEEP_SIZE. + * do not trim all blocks after i_size if target size is + * larger than i_size. */ - f2fs_truncate(inode); + truncate_setsize(inode, attr->ia_size); } } From ab84afda4a9ad9644a0b2e962cc899ceda544783 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Jun 2015 13:20:10 +0800 Subject: [PATCH 310/321] f2fs: setting discard option in parse_options() For the first mount of f2fs image with realtime discard option, we will disable discard option if device is not supported, but for remount operation, our discard option can still be set, this should be avoided. This patch moves configuring of discard option to parse_options() to fix this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f0be6a62..d54c54f6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -258,6 +258,7 @@ static void init_once(void *foo) static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct request_queue *q; substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; @@ -302,7 +303,14 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; break; case Opt_discard: - set_opt(sbi, DISCARD); + q = bdev_get_queue(sb->s_bdev); + if (blk_queue_discard(q)) { + set_opt(sbi, DISCARD); + } else { + f2fs_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but " + "the device does not support discard"); + } break; case Opt_noheap: set_opt(sbi, NOHEAP); @@ -1235,16 +1243,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) proc_create_data("segment_info", S_IRUGO, sbi->s_proc, &f2fs_seq_segment_info_fops, sb); - if (test_opt(sbi, DISCARD)) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) { - f2fs_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - clear_opt(sbi, DISCARD); - } - } - sbi->s_kobj.kset = f2fs_kset; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, From 7b8c767615d28d2ae0a441d0eeeb86d252209989 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Jun 2015 13:28:03 +0800 Subject: [PATCH 311/321] f2fs: skip committing valid superblock In recovery procedure for superblock, we try to write data of valid superblock into invalid one for recovery, work should be finished here, but then still we will write the valid one with its original data. This operation is not needed. Let's skip doing this unnecessary work. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/super.c | 8 +++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 843d658d..70bc217f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1623,7 +1623,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) /* * super.c */ -int f2fs_commit_super(struct f2fs_sb_info *); +int f2fs_commit_super(struct f2fs_sb_info *, bool); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3943d961..ef83504a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1549,7 +1549,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) /* update superblock with uuid */ generate_random_uuid(sbi->raw_super->encrypt_pw_salt); - err = f2fs_commit_super(sbi); + err = f2fs_commit_super(sbi, false); mnt_drop_write_file(filp); if (err) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d54c54f6..a06b0b46 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1043,7 +1043,7 @@ static int read_raw_super_block(struct super_block *sb, return 0; } -int f2fs_commit_super(struct f2fs_sb_info *sbi) +int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct buffer_head *sbh = sbi->raw_super_buf; sector_t block = sbh->b_blocknr; @@ -1055,7 +1055,9 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi) err = sync_dirty_buffer(sbh); sbh->b_blocknr = block; - if (err) + + /* if we are in recovery path, skip writing valid superblock */ + if (recover || err) goto out; /* write current valid superblock */ @@ -1289,7 +1291,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* recover broken superblock */ if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) { f2fs_msg(sb, KERN_INFO, "Recover invalid superblock"); - f2fs_commit_super(sbi); + f2fs_commit_super(sbi, true); } return 0; From 9361ba20f89af76f5d4da7499184ffca2ae2b1bc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 5 Oct 2015 14:49:57 -0700 Subject: [PATCH 312/321] - f2fs: introduce a periodic checkpoint flow This patch introduces a periodic checkpoint feature. Note that, this is not enforcing to conduct checkpoints very strictly in terms of trigger timing, instead just hope to help user experiences. The default value is 60 seconds. Change-Id: Idb9d28f8bb9e8eb8c1078318d0e7bb7267cfd631 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Reviewed-on: http://gerrit.mot.com/793273 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Submit-Approved: Jira Key Tested-by: Jira Key Reviewed-by: Russell Knize Reviewed-by: Sheng-Zhe Zhao --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/checkpoint.c | 3 +++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 4 +++- fs/f2fs/super.c | 5 +++++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 2c4cc420..e066281d 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -80,3 +80,9 @@ Date: February 2015 Contact: "Jaegeuk Kim" Description: Controls the trimming rate in batch mode. + +What: /sys/fs/f2fs//cp_interval +Date: October 2015 +Contact: "Jaegeuk Kim" +Description: + Controls the checkpoint timing. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f529e896..958890f6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1087,6 +1087,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason == CP_RECOVERY) f2fs_msg(sbi->sb, KERN_NOTICE, "checkpoint: version = %llx", ckpt_ver); + + /* do checkpoint periodically */ + sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval); out: mutex_unlock(&sbi->cp_mutex); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 70bc217f..1a5bddf3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -121,6 +121,7 @@ enum { (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) +#define DEF_CP_INTERVAL 60 /* 60 secs */ struct cp_control { int reason; @@ -715,6 +716,7 @@ struct f2fs_sb_info { struct rw_semaphore node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; + long cp_expires, cp_interval; /* next expected periodic cp */ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 177c37ab..d553a58d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -367,7 +368,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || excess_prefree_segs(sbi) || - !available_free_memory(sbi, INO_ENTRIES)) + !available_free_memory(sbi, INO_ENTRIES) || + jiffies > sbi->cp_expires) f2fs_sync_fs(sbi->sb, true); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a06b0b46..43874d1b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -206,6 +206,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -222,6 +223,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(max_victim_search), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), + ATTR_LIST(cp_interval), NULL, }; @@ -969,6 +971,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); sbi->dir_level = DEF_DIR_LEVEL; + sbi->cp_interval = DEF_CP_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); } @@ -1294,6 +1297,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_commit_super(sbi, true); } + sbi->cp_expires = round_jiffies_up(jiffies); + return 0; free_kobj: From 2ddc1de1f3ec315c1bc083ce34021d812a653cfb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Oct 2015 18:49:50 -0700 Subject: [PATCH 313/321] - f2fs: fix leakage of inmemory atomic pages If we got failure during commit_atomic_write, abort_volatile_write will be called, but will not drop the inmemory pages due to no FI_ATOMIC_FILE. Actually, there is no reason to check the flag in abort_volatile_write. Change-Id: I0411d0f8a47af81bea2cd09ae576a9778268500d Signed-off-by: Jaegeuk Kim Reviewed-on: http://gerrit.mot.com/793544 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Submit-Approved: Jira Key Tested-by: Jira Key Reviewed-by: Russell Knize Reviewed-by: Sheng-Zhe Zhao --- fs/f2fs/file.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ef83504a..8c6f5b61 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1407,13 +1407,9 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) f2fs_balance_fs(F2FS_I_SB(inode)); - if (f2fs_is_atomic_file(inode)) { - commit_inmem_pages(inode, false); - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); - } - - if (f2fs_is_volatile_file(inode)) - clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + commit_inmem_pages(inode, false); mnt_drop_write_file(filp); return ret; From cb52d5193a37959c4e097249c0bb208eb74caca1 Mon Sep 17 00:00:00 2001 From: Jiangli Yuan Date: Wed, 23 Mar 2016 18:48:34 -0700 Subject: [PATCH 314/321] f2fs: set f2fs garbage collection thread as freezable Set f2fs gc kernel thread as freezable, so it will be frozen during system suspending. Change-Id: I05a06adb5547de7d0b01a49582c6f2f02fcaaf87 Signed-off-by: Jiangli Yuan Reviewed-on: http://gerrit.mot.com/836909 SME-Granted: SME Approvals Granted SLTApproved: Slta Waiver Tested-by: Jira Key Reviewed-by: Zhi-Ming Yuan Submit-Approved: Jira Key --- fs/f2fs/gc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e1e73617..5429843b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -33,13 +33,16 @@ static int gc_thread_func(void *data) wait_ms = gc_th->min_sleep_time; + set_freezable(); do { + + wait_event_interruptible_timeout(*wq, + kthread_should_stop() || freezing(current), + msecs_to_jiffies(wait_ms)); + if (try_to_freeze()) continue; - else - wait_event_interruptible_timeout(*wq, - kthread_should_stop(), - msecs_to_jiffies(wait_ms)); + if (kthread_should_stop()) break; From 0e15efc7ac28c77b412f00dcc0909f9933f4ed0a Mon Sep 17 00:00:00 2001 From: Jiangli Yuan Date: Thu, 12 May 2016 17:00:01 -0700 Subject: [PATCH 315/321] f2fs: Add a ioctl to disable garbage collection It will stop both the background GC and foreground GC. Change-Id: I3152bb1928277a31a2243de2a07c908eea5938dc Signed-off-by: Jiangli Yuan Reviewed-on: http://gerrit.mot.com/856055 SME-Granted: SME Approvals Granted SLTApproved: Slta Waiver Tested-by: Jira Key Reviewed-by: Zhi-Ming Yuan Reviewed-by: Sheng-Zhe Zhao Submit-Approved: Jira Key --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 4 ++++ fs/f2fs/segment.c | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1a5bddf3..d66824cf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -230,6 +230,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ #define FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ #define FS_GOING_DOWN_NOSYNC 0x2 /* going down */ +#define FS_GOING_STOP_GC 0x3 /* stoping all gc */ #define F2FS_IOCTL_MAGIC 0xf5 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) @@ -688,6 +689,7 @@ enum { SBI_IS_CLOSE, /* specify unmounting */ SBI_NEED_FSCK, /* need fsck.f2fs to fix */ SBI_POR_DOING, /* recovery is doing or not */ + SBI_NO_GC, /* disable f2fs gc */ }; struct f2fs_sb_info { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8c6f5b61..ca79cfd2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1444,6 +1444,10 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) case FS_GOING_DOWN_NOSYNC: f2fs_stop_checkpoint(sbi); break; + case FS_GOING_STOP_GC: + stop_gc_thread(sbi); + set_sbi_flag(sbi, SBI_NO_GC); + break; default: return -EINVAL; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d553a58d..76f20d48 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -354,7 +354,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. */ - if (has_not_enough_free_secs(sbi, 0)) { + if (has_not_enough_free_secs(sbi, 0) && + !is_sbi_flag_set(sbi, SBI_NO_GC)) { mutex_lock(&sbi->gc_mutex); f2fs_gc(sbi); } From e7fb32155cc097f07c1acc39dd0c759f1c9462ee Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 22 Jun 2016 19:47:04 -0500 Subject: [PATCH 316/321] f2fs: add a max block check for get_data_block_bmap This patch adds a max block check for get_data_block_bmap. Tirinity test program will send a block number as parameter into ioctl_fibmap, which will be used in get_node_path(), when the block number large than f2fs max blocks, it will trigger kernel bug. Miot-CRs-fixed: (CR) Signed-off-by: Yunlei He Signed-off-by: Xue Liu [Jaegeuk Kim: fix missing condition, pointed by Chao Yu] Signed-off-by: Jaegeuk Kim (ported from 3.18 kernel) Signed-off-by: Yi-wei Zhao Change-Id: Idd2160b402a640652158ec7502f2d6e7194062f0 Reviewed-on: http://gerrit.mot.com/869829 SME-Granted: SME Approvals Granted SLTApproved: Slta Waiver Tested-by: Jira Key Reviewed-by: Zhi-Ming Yuan Submit-Approved: Jira Key (cherry picked from commit e1683b51f721321ea3bfd27ba4f9a6a384c00805) Reviewed-on: http://gerrit.mot.com/873399 Reviewed-by: Anandappan Chakravarthy SLTApproved: Harsha Hosakoppa Rudresha --- fs/f2fs/data.c | 4 ++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cd003b88..eb69fca3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2094,6 +2094,10 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) if (err) return err; } + /* Block number less than F2FS MAX BLOCKS */ + if (unlikely(block >= max_file_size(0))) + return -EFBIG; + return generic_block_bmap(mapping, block, get_data_block); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d66824cf..4bb14c08 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1628,6 +1628,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) * super.c */ int f2fs_commit_super(struct f2fs_sb_info *, bool); +loff_t max_file_size(unsigned bits); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 43874d1b..c5512df0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -851,7 +851,7 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -static loff_t max_file_size(unsigned bits) +loff_t max_file_size(unsigned bits) { loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); loff_t leaf_count = ADDRS_PER_BLOCK; From 6728865def0b165881706ed2b8fa5e70750124be Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Jun 2016 20:25:01 -0500 Subject: [PATCH 317/321] f2fs: introduce max_file_blocks in sbi Introduce max_file_blocks in sbi to store max block index of file in f2fs, it could be used to avoid unneeded calculation of max block index in runtime. Mot-CRs-fixed: (CR) Signed-off-by: Chao Yu [Jaegeuk Kim: fix overflow of sbi->max_file_blocks] Signed-off-by: Jaegeuk Kim (port from 3.18 kernel) Signed-off-by: Yi-wei Zhao Change-Id: Ib937ec73feda9cdb651787a4088511bee64d875f Reviewed-on: http://gerrit.mot.com/869830 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Tested-by: Jira Key Reviewed-by: Zhi-Ming Yuan Submit-Approved: Jira Key (cherry picked from commit 7fe9634bc9f4f2ac53298b63642b022470e8a4ff) Reviewed-on: http://gerrit.mot.com/874164 Reviewed-by: Navya Vijayan SLTApproved: Harsha Hosakoppa Rudresha --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/super.c | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index eb69fca3..eb043c30 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2095,7 +2095,7 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) return err; } /* Block number less than F2FS MAX BLOCKS */ - if (unlikely(block >= max_file_size(0))) + if (unlikely(block >= F2FS_I_SB(inode)->max_file_blocks)) return -EFBIG; return generic_block_bmap(mapping, block, get_data_block); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4bb14c08..01024efe 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -752,6 +752,7 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ unsigned int total_valid_inode_count; /* valid inode count */ + loff_t max_file_blocks; /* max block index of file */ int active_logs; /* # of active logs */ int dir_level; /* directory level */ @@ -1628,7 +1629,6 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) * super.c */ int f2fs_commit_super(struct f2fs_sb_info *, bool); -loff_t max_file_size(unsigned bits); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c5512df0..75f8fe9a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -851,7 +851,7 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -loff_t max_file_size(unsigned bits) +static loff_t max_file_blocks(void) { loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); loff_t leaf_count = ADDRS_PER_BLOCK; @@ -867,7 +867,6 @@ loff_t max_file_size(unsigned bits) leaf_count *= NIDS_PER_BLOCK; result += leaf_count; - result <<= bits; return result; } @@ -1117,7 +1116,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_options; - sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); + sbi->max_file_blocks = max_file_blocks(); + sb->s_maxbytes = sbi->max_file_blocks << + le32_to_cpu(raw_super->log_blocksize); sb->s_max_links = F2FS_LINK_MAX; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); From 1a144c6d87fdfe08fb6c36569ce02e66492978d5 Mon Sep 17 00:00:00 2001 From: Lianwei Wang Date: Thu, 23 Jun 2016 21:39:22 -0500 Subject: [PATCH 318/321] f2fs: support emulated case-insensitive sd media device SD card is case-insensitive file system. And all the file under media will be mount to PC with MTP protocol which is case-insensitive as well. Mot-CRs-fixed: (CR) Change-Id: If3be26d043213e5c21be3c80ca70aff21a8411d4 Signed-off-by: Lianwei Wang Reviewed-on: http://gerrit.mot.com/891999 SME-Granted: SME Approvals Granted SLTApproved: Slta Waiver Tested-by: Jira Key Reviewed-by: Zhi-Ming Yuan Submit-Approved: Jira Key --- fs/f2fs/namei.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index eaca4e2e..0c8da66f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -23,6 +23,76 @@ #include "acl.h" #include +/* dcache dops */ + +#define MEDIA_NAME "media" +#define MEDIA_NAME_LEN 5 + +static unsigned int __f2fs_striptail_len(unsigned int len, const char *name) +{ + while (len && name[len - 1] == '.') + len--; + return len; +} + +static unsigned int f2fs_striptail_len(const struct qstr *qstr) +{ + return __f2fs_striptail_len(qstr->len, qstr->name); +} + +static int f2fs_d_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) +{ + const unsigned char *name; + unsigned int len; + unsigned long hash; + + name = qstr->name; + len = f2fs_striptail_len(qstr); + + hash = init_name_hash(); + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + qstr->hash = end_name_hash(hash); + + return 0; +} + +static int f2fs_d_compare(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) +{ + unsigned int alen, blen; + + /* A filename cannot end in '.' or we treat it like it has none */ + alen = f2fs_striptail_len(name); + blen = __f2fs_striptail_len(len, str); + if (alen == blen) { + if (strncasecmp(name->name, str, alen) == 0) + return 0; + } + return 1; +} + +const struct dentry_operations f2fs_dops = { + .d_hash = f2fs_d_hash, + .d_compare = f2fs_d_compare, +}; + + +static void f2fs_set_d_dops(struct dentry *dentry) +{ + if (!strncmp(dentry->d_name.name, MEDIA_NAME, MEDIA_NAME_LEN) && + dentry->d_name.len == MEDIA_NAME_LEN && + dentry->d_parent && + !strncmp(dentry->d_parent->d_name.name, "/", 1) && + dentry->d_parent->d_name.len == 1) { + d_set_d_op(dentry, &f2fs_dops); + } else if (dentry->d_parent && dentry->d_parent->d_op == &f2fs_dops) { + d_set_d_op(dentry, dentry->d_parent->d_op); + } +} + static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -249,6 +319,10 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + f2fs_set_d_dops(dentry); + if (dentry->d_op == &f2fs_dops) + flags |= LOOKUP_NOCASE; + de = f2fs_find_entry(dir, &dentry->d_name, &page, flags); if (!de) return d_splice_alias(inode, dentry); From 63f3c39cab1ac43b066203aacf6beb185c605b1c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Apr 2015 11:03:48 -0700 Subject: [PATCH 319/321] f2fs: fix wrong error hanlder in f2fs_follow_link The page_follow_link_light returns NULL and its error pointer was remained in nd->path. Mot-CRs-fixed: (CR) Change-Id: Ie360f3d07b7c433926c89a52e307e0a834ef19df Reported-by: Dan Carpenter Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Reviewed-on: http://gerrit.mot.com/892000 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Tested-by: Jira Key Reviewed-by: Sheng-Zhe Zhao Reviewed-by: Zhi-Ming Yuan Submit-Approved: Jira Key --- fs/f2fs/namei.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0c8da66f..b82058a5 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -385,16 +385,14 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct page *page; + struct page *page = page_follow_link_light(dentry, nd); - page = page_follow_link_light(dentry, nd); - if (IS_ERR(page)) + if (IS_ERR_OR_NULL(page)) return page; /* this is broken symlink case */ if (*nd_get_link(nd) == 0) { - kunmap(page); - page_cache_release(page); + page_put_link(dentry, nd, page); return ERR_PTR(-ENOENT); } return page; From a984982cb52818df94b8880de8d7d57e1024bf61 Mon Sep 17 00:00:00 2001 From: Lianwei Wang Date: Fri, 1 Jul 2016 12:15:12 -0500 Subject: [PATCH 320/321] f2fs: support case-insensitive from xattr The case-insensitive is enabled by setting nocase xattr from userspace. Mot-CRs-fixed: (CR) Change-Id: I9d43debd28b5c99dc7dd0a7fbe0b16fb52978d3e Signed-off-by: Lianwei Wang Reviewed-on: http://gerrit.mot.com/892001 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Tested-by: Jira Key Reviewed-by: Zhi-Ming Yuan Submit-Approved: Jira Key --- fs/f2fs/f2fs.h | 1 + fs/f2fs/namei.c | 44 ++++++++++++++++++++++++++++---------------- fs/f2fs/xattr.c | 4 ++++ fs/f2fs/xattr.h | 4 ++++ 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 01024efe..29cd155c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1585,6 +1585,7 @@ void handle_failed_inode(struct inode *); * namei.c */ struct dentry *f2fs_get_parent(struct dentry *child); +void f2fs_set_nocase_dop(struct inode *inode); /* * dir.c diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b82058a5..18238f8e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -24,10 +24,6 @@ #include /* dcache dops */ - -#define MEDIA_NAME "media" -#define MEDIA_NAME_LEN 5 - static unsigned int __f2fs_striptail_len(unsigned int len, const char *name) { while (len && name[len - 1] == '.') @@ -79,17 +75,23 @@ const struct dentry_operations f2fs_dops = { .d_compare = f2fs_d_compare, }; - -static void f2fs_set_d_dops(struct dentry *dentry) +void f2fs_set_nocase_dop(struct inode *inode) { - if (!strncmp(dentry->d_name.name, MEDIA_NAME, MEDIA_NAME_LEN) && - dentry->d_name.len == MEDIA_NAME_LEN && - dentry->d_parent && - !strncmp(dentry->d_parent->d_name.name, "/", 1) && - dentry->d_parent->d_name.len == 1) { - d_set_d_op(dentry, &f2fs_dops); - } else if (dentry->d_parent && dentry->d_parent->d_op == &f2fs_dops) { - d_set_d_op(dentry, dentry->d_parent->d_op); + struct dentry *dentry; + + /* only dir can be set */ + if (!S_ISDIR(inode->i_mode)) + return; + + /* dir inode have one alias at most */ + dentry = d_find_alias(inode); + + if (dentry) { + if (!dentry->d_op) { + shrink_dcache_parent(dentry); + d_set_d_op(dentry, &f2fs_dops); + } + dput(dentry); } } @@ -319,8 +321,10 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - f2fs_set_d_dops(dentry); - if (dentry->d_op == &f2fs_dops) + if (!dentry->d_op && dentry->d_parent && dentry->d_parent->d_op) + d_set_d_op(dentry, dentry->d_parent->d_op); + + if (dentry->d_op) flags |= LOOKUP_NOCASE; de = f2fs_find_entry(dir, &dentry->d_name, &page, flags); @@ -340,6 +344,14 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (err) goto err_out; } + + if (S_ISDIR(inode->i_mode) && !dentry->d_op) { + err = f2fs_getxattr(inode, F2FS_XATTR_INDEX_USER, + F2FS_XATTR_DIR_NOCASE, NULL, 0, NULL); + if (err > 0) + d_set_d_op(dentry, &f2fs_dops); + } + return d_splice_alias(inode, dentry); err_out: diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 43da08a3..e9a09fae 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -591,6 +591,10 @@ static int __f2fs_setxattr(struct inode *inode, int index, update_inode(inode, ipage); else update_inode_page(inode); + + if (S_ISDIR(inode->i_mode) && len == F2FS_XATTR_DIR_NOCASE_LEN && + !memcmp(name, F2FS_XATTR_DIR_NOCASE, len)) + f2fs_set_nocase_dop(inode); exit: kzfree(base_addr); return error; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 47cf0e58..623082f7 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -40,6 +40,10 @@ #define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c" +/* user.nocase xattribute for case-insensitive dir support */ +#define F2FS_XATTR_DIR_NOCASE "nocase" +#define F2FS_XATTR_DIR_NOCASE_LEN 6 + struct f2fs_xattr_header { __le32 h_magic; /* magic number for identification */ __le32 h_refcount; /* reference count */ From d2fe08f11fc18a28df1923c17eacfcca163c1ce7 Mon Sep 17 00:00:00 2001 From: Lianwei Wang Date: Wed, 24 Aug 2016 20:15:59 -0500 Subject: [PATCH 321/321] f2fs: make case-insensitive feature configurable Make it configurable on 3.10 kernel to not impact on the other devices. Mot-CRs-fixed: (CR) Change-Id: Icb13a77f479ffd3f96e1330894f27db700d5c7cc Signed-off-by: Lianwei Wang Reviewed-on: http://gerrit.mot.com/892384 SME-Granted: SME Approvals Granted SLTApproved: Slta Waiver Tested-by: Jira Key Reviewed-by: Zhi-Ming Yuan Submit-Approved: Jira Key --- fs/f2fs/Kconfig | 9 +++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/namei.c | 6 ++++++ fs/f2fs/xattr.c | 3 +++ fs/f2fs/xattr.h | 2 ++ 5 files changed, 22 insertions(+) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 363abf9e..61765d23 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -100,3 +100,12 @@ config F2FS_IO_TRACE information and block IO patterns in the filesystem level. If unsure, say N. + +config F2FS_EMULATED_SD + bool "F2FS emulated SD" + depends on F2FS_FS + depends on F2FS_FS_XATTR + help + Enable emulated SD card on f2fs. + + If unsure, say N. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 29cd155c..c604f8ef 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1585,7 +1585,9 @@ void handle_failed_inode(struct inode *); * namei.c */ struct dentry *f2fs_get_parent(struct dentry *child); +#ifdef CONFIG_F2FS_EMULATED_SD void f2fs_set_nocase_dop(struct inode *inode); +#endif /* * dir.c diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 18238f8e..c4fc9b32 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -23,6 +23,7 @@ #include "acl.h" #include +#ifdef CONFIG_F2FS_EMULATED_SD /* dcache dops */ static unsigned int __f2fs_striptail_len(unsigned int len, const char *name) { @@ -94,6 +95,7 @@ void f2fs_set_nocase_dop(struct inode *inode) dput(dentry); } } +#endif static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) { @@ -321,11 +323,13 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); +#ifdef CONFIG_F2FS_EMULATED_SD if (!dentry->d_op && dentry->d_parent && dentry->d_parent->d_op) d_set_d_op(dentry, dentry->d_parent->d_op); if (dentry->d_op) flags |= LOOKUP_NOCASE; +#endif de = f2fs_find_entry(dir, &dentry->d_name, &page, flags); if (!de) @@ -345,12 +349,14 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto err_out; } +#ifdef CONFIG_F2FS_EMULATED_SD if (S_ISDIR(inode->i_mode) && !dentry->d_op) { err = f2fs_getxattr(inode, F2FS_XATTR_INDEX_USER, F2FS_XATTR_DIR_NOCASE, NULL, 0, NULL); if (err > 0) d_set_d_op(dentry, &f2fs_dops); } +#endif return d_splice_alias(inode, dentry); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index e9a09fae..93dd554b 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -592,9 +592,12 @@ static int __f2fs_setxattr(struct inode *inode, int index, else update_inode_page(inode); +#ifdef CONFIG_F2FS_EMULATED_SD if (S_ISDIR(inode->i_mode) && len == F2FS_XATTR_DIR_NOCASE_LEN && !memcmp(name, F2FS_XATTR_DIR_NOCASE, len)) f2fs_set_nocase_dop(inode); +#endif + exit: kzfree(base_addr); return error; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 623082f7..049471db 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -40,9 +40,11 @@ #define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c" +#ifdef CONFIG_F2FS_EMULATED_SD /* user.nocase xattribute for case-insensitive dir support */ #define F2FS_XATTR_DIR_NOCASE "nocase" #define F2FS_XATTR_DIR_NOCASE_LEN 6 +#endif struct f2fs_xattr_header { __le32 h_magic; /* magic number for identification */