diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs new file mode 100644 index 00000000..e066281d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -0,0 +1,88 @@ +What: /sys/fs/f2fs//gc_max_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the maximun sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_min_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the minimum sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_no_gc_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the default sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_idle +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the victim selection policy for garbage collection. + +What: /sys/fs/f2fs//reclaim_segments +Date: October 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the issue rate of segment discard commands. + +What: /sys/fs/f2fs//ipu_policy +Date: November 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the in-place-update policy. + +What: /sys/fs/f2fs//min_ipu_util +Date: November 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the FS utilization condition for the in-place-update + policies. + +What: /sys/fs/f2fs//min_fsync_blocks +Date: September 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the dirty page count condition for the in-place-update + policies. + +What: /sys/fs/f2fs//max_small_discards +Date: November 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the issue rate of small discard commands. + +What: /sys/fs/f2fs//max_victim_search +Date: January 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the number of trials to find a victim segment. + +What: /sys/fs/f2fs//dir_level +Date: March 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the directory level for large directory. + +What: /sys/fs/f2fs//ram_thresh +Date: March 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the memory footprint used by f2fs. + +What: /sys/fs/f2fs//trim_sections +Date: February 2015 +Contact: "Jaegeuk Kim" +Description: + Controls the trimming rate in batch mode. + +What: /sys/fs/f2fs//cp_interval +Date: October 2015 +Contact: "Jaegeuk Kim" +Description: + Controls the checkpoint timing. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index d9086e6d..26e78780 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -106,6 +106,8 @@ background_gc=%s Turn on/off cleaning operations, namely garbage Default value for this option is on. So garbage collection is on by default. disable_roll_forward Disable the roll-forward recovery routine +norecovery Disable the roll-forward recovery routine, mounted read- + only (i.e., -o ro,disable_roll_forward) discard Issue discard/TRIM commands when a segment is cleaned. no_heap Disable heap-style segment allocation which finds free segments for data from the beginning of main area, while @@ -122,6 +124,10 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs inline_xattr Enable the inline xattrs feature. inline_data Enable the inline data feature: New created small(<~3.4k) files can be written into inode block. +inline_dentry Enable the inline dir feature: data in new created + directory entries can be written into inode block. The + space of inode block which is used to store inline + dentries is limited to ~3.4k. flush_merge Merge concurrent cache_flush commands as much as possible to eliminate redundant command issues. If the underlying device handles the cache_flush command relatively slowly, @@ -131,6 +137,15 @@ nobarrier This option can be used if underlying storage guarantees If this option is set, no cache_flush commands are issued but f2fs still guarantees the write ordering of all the data writes. +fastboot This option is used when a system wants to reduce mount + time as much as possible, even though normal performance + can be sacrificed. +extent_cache Enable an extent cache based on rb-tree, it can cache + as many as extent which map between contiguous logical + address and physical address per inode, resulting in + increasing the cache hit ratio. +noinline_data Disable the inline data feature, inline data feature is + enabled by default. ================================================================================ DEBUGFS ENTRIES @@ -190,6 +205,10 @@ Files in /sys/fs/f2fs/ checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. + trim_sections This parameter controls the number of sections + to be trimmed out in batch mode when FITRIM + conducts. 32 sections is set by default. + ipu_policy This parameter controls the policy of in-place updates in f2fs. There are five policies: 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, diff --git a/fs/Kconfig b/fs/Kconfig new file mode 100644 index 00000000..011f4336 --- /dev/null +++ b/fs/Kconfig @@ -0,0 +1,281 @@ +# +# File system configuration +# + +menu "File systems" + +# Use unaligned word dcache accesses +config DCACHE_WORD_ACCESS + bool + +if BLOCK + +source "fs/ext2/Kconfig" +source "fs/ext3/Kconfig" +source "fs/ext4/Kconfig" +source "fs/jbd/Kconfig" +source "fs/jbd2/Kconfig" + +config FS_MBCACHE +# Meta block cache for Extended Attributes (ext2/ext3/ext4) + tristate + default y if EXT2_FS=y && EXT2_FS_XATTR + default y if EXT3_FS=y && EXT3_FS_XATTR + default y if EXT4_FS=y + default m if EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS + +source "fs/reiserfs/Kconfig" +source "fs/jfs/Kconfig" + +source "fs/xfs/Kconfig" +source "fs/gfs2/Kconfig" +source "fs/ocfs2/Kconfig" +source "fs/btrfs/Kconfig" +source "fs/nilfs2/Kconfig" +source "fs/f2fs/Kconfig" + +config FS_DAX + bool "Direct Access (DAX) support" + depends on MMU + depends on !(ARM || MIPS || SPARC) + help + Direct Access (DAX) can be used on memory-backed block devices. + If the block device supports DAX and the filesystem supports DAX, + then you can avoid using the pagecache to buffer I/Os. Turning + on this option will compile in support for DAX; you will need to + mount the filesystem using the -o dax option. + + If you do not have a block device that is capable of using this, + or if unsure, say N. Saying Y will increase the size of the kernel + by about 5kB. + +endif # BLOCK + +# Posix ACL utility routines +# +# Note: Posix ACLs can be implemented without these helpers. Never use +# this symbol for ifdefs in core code. +# +config FS_POSIX_ACL + def_bool n + +config EXPORTFS + tristate + +config FILE_LOCKING + bool "Enable POSIX file locking API" if EXPERT + default y + help + This option enables standard file locking support, required + for filesystems like NFS and for the flock() system + call. Disabling this option saves about 11k. + +source "fs/notify/Kconfig" + +source "fs/quota/Kconfig" + +source "fs/autofs4/Kconfig" +source "fs/fuse/Kconfig" +source "fs/overlayfs/Kconfig" + +menu "Caches" + +source "fs/fscache/Kconfig" +source "fs/cachefiles/Kconfig" + +endmenu + +if BLOCK +menu "CD-ROM/DVD Filesystems" + +source "fs/isofs/Kconfig" +source "fs/udf/Kconfig" + +endmenu +endif # BLOCK + +if BLOCK +menu "DOS/FAT/NT Filesystems" + +source "fs/fat/Kconfig" +source "fs/ntfs/Kconfig" + +endmenu +endif # BLOCK + +menu "Pseudo filesystems" + +source "fs/proc/Kconfig" +source "fs/kernfs/Kconfig" +source "fs/sysfs/Kconfig" + +config TMPFS + bool "Tmpfs virtual memory file system support (former shm fs)" + depends on SHMEM + help + Tmpfs is a file system which keeps all files in virtual memory. + + Everything in tmpfs is temporary in the sense that no files will be + created on your hard drive. The files live in memory and swap + space. If you unmount a tmpfs instance, everything stored therein is + lost. + + See for details. + +config TMPFS_POSIX_ACL + bool "Tmpfs POSIX Access Control Lists" + depends on TMPFS + select TMPFS_XATTR + select FS_POSIX_ACL + help + POSIX Access Control Lists (ACLs) support additional access rights + for users and groups beyond the standard owner/group/world scheme, + and this option selects support for ACLs specifically for tmpfs + filesystems. + + If you've selected TMPFS, it's possible that you'll also need + this option as there are a number of Linux distros that require + POSIX ACL support under /dev for certain features to work properly. + For example, some distros need this feature for ALSA-related /dev + files for sound to work properly. In short, if you're not sure, + say Y. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website . + +config TMPFS_XATTR + bool "Tmpfs extended attributes" + depends on TMPFS + default n + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + for details). + + Currently this enables support for the trusted.* and + security.* namespaces. + + You need this for POSIX ACL support on tmpfs. + + If unsure, say N. + +config HUGETLBFS + bool "HugeTLB file system support" + depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \ + SYS_SUPPORTS_HUGETLBFS || BROKEN + help + hugetlbfs is a filesystem backing for HugeTLB pages, based on + ramfs. For architectures that support it, say Y here and read + for details. + + If unsure, say N. + +config HUGETLB_PAGE + def_bool HUGETLBFS + +source "fs/configfs/Kconfig" +source "fs/efivarfs/Kconfig" + +endmenu + +menuconfig MISC_FILESYSTEMS + bool "Miscellaneous filesystems" + default y + ---help--- + Say Y here to get to see options for various miscellaneous + filesystems, such as filesystems that came from other + operating systems. + + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled; if unsure, say Y here. + +if MISC_FILESYSTEMS + +source "fs/adfs/Kconfig" +source "fs/affs/Kconfig" +source "fs/ecryptfs/Kconfig" +source "fs/hfs/Kconfig" +source "fs/hfsplus/Kconfig" +source "fs/befs/Kconfig" +source "fs/bfs/Kconfig" +source "fs/efs/Kconfig" +source "fs/jffs2/Kconfig" +# UBIFS File system configuration +source "fs/ubifs/Kconfig" +source "fs/logfs/Kconfig" +source "fs/cramfs/Kconfig" +source "fs/squashfs/Kconfig" +source "fs/freevxfs/Kconfig" +source "fs/minix/Kconfig" +source "fs/omfs/Kconfig" +source "fs/hpfs/Kconfig" +source "fs/qnx4/Kconfig" +source "fs/qnx6/Kconfig" +source "fs/romfs/Kconfig" +source "fs/pstore/Kconfig" +source "fs/sysv/Kconfig" +source "fs/ufs/Kconfig" +source "fs/exofs/Kconfig" + +endif # MISC_FILESYSTEMS + +source "fs/exofs/Kconfig.ore" + +menuconfig NETWORK_FILESYSTEMS + bool "Network File Systems" + default y + depends on NET + ---help--- + Say Y here to get to see options for network filesystems and + filesystem-related networking code, such as NFS daemon and + RPCSEC security modules. + + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled; if unsure, say Y here. + +if NETWORK_FILESYSTEMS + +source "fs/nfs/Kconfig" +source "fs/nfsd/Kconfig" + +config GRACE_PERIOD + tristate + +config LOCKD + tristate + depends on FILE_LOCKING + select GRACE_PERIOD + +config LOCKD_V4 + bool + depends on NFSD_V3 || NFS_V3 + depends on FILE_LOCKING + default y + +config NFS_ACL_SUPPORT + tristate + select FS_POSIX_ACL + +config NFS_COMMON + bool + depends on NFSD || NFS_FS || LOCKD + default y + +source "net/sunrpc/Kconfig" +source "fs/ceph/Kconfig" +source "fs/cifs/Kconfig" +source "fs/ncpfs/Kconfig" +source "fs/coda/Kconfig" +source "fs/afs/Kconfig" +source "fs/9p/Kconfig" + +endif # NETWORK_FILESYSTEMS + +source "fs/nls/Kconfig" +source "fs/dlm/Kconfig" + +endmenu diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 67443fe7..61765d23 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -1,5 +1,5 @@ config F2FS_FS - tristate "F2FS filesystem support (EXPERIMENTAL)" + tristate "F2FS filesystem support" depends on BLOCK help F2FS is based on Log-structured File System (LFS), which supports @@ -71,3 +71,41 @@ config F2FS_CHECK_FS Enables BUG_ONs which check the filesystem consistency in runtime. If you want to improve the performance, say N. + +config F2FS_FS_ENCRYPTION + bool "F2FS Encryption" + depends on F2FS_FS + depends on F2FS_FS_XATTR + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_ECB + select CRYPTO_XTS + select CRYPTO_CTS + select CRYPTO_CTR + select CRYPTO_SHA256 + select KEYS + select ENCRYPTED_KEYS + help + Enable encryption of f2fs files and directories. This + feature is similar to ecryptfs, but it is more memory + efficient since it avoids caching the encrypted and + decrypted pages in the page cache. + +config F2FS_IO_TRACE + bool "F2FS IO tracer" + depends on F2FS_FS + depends on FUNCTION_TRACER + help + F2FS IO trace is based on a function trace, which gathers process + information and block IO patterns in the filesystem level. + + If unsure, say N. + +config F2FS_EMULATED_SD + bool "F2FS emulated SD" + depends on F2FS_FS + depends on F2FS_FS_XATTR + help + Enable emulated SD card on f2fs. + + If unsure, say N. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 2e35da12..396be1a3 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -5,3 +5,6 @@ f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o +f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o +f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o \ + crypto_key.o crypto_fname.o diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 83b9b5a8..5b952c05 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) if (count == 0) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); @@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) int i; f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * - sizeof(struct f2fs_acl_entry), GFP_KERNEL); + sizeof(struct f2fs_acl_entry), GFP_NOFS); if (!f2fs_acl) return ERR_PTR(-ENOMEM); @@ -162,22 +162,32 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) return ERR_PTR(-EINVAL); } -struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, + struct page *dpage) { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; void *value = NULL; struct posix_acl *acl; int retval; + if (!test_opt(sbi, POSIX_ACL)) + return NULL; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + if (type == ACL_TYPE_ACCESS) name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); if (retval > 0) { value = kmalloc(retval, GFP_F2FS_ZERO); if (!value) return ERR_PTR(-ENOMEM); - retval = f2fs_getxattr(inode, name_index, "", value, retval); + retval = f2fs_getxattr(inode, name_index, "", value, + retval, dpage); } if (retval > 0) @@ -194,15 +204,26 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) return acl; } -static int __f2fs_set_acl(struct inode *inode, int type, +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + return __f2fs_get_acl(inode, type, NULL); +} + +static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); int name_index; void *value = NULL; size_t size = 0; int error; + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; @@ -229,7 +250,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (acl) { value = f2fs_acl_to_disk(acl, &size); if (IS_ERR(value)) { - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return (int)PTR_ERR(value); } } @@ -240,35 +261,159 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (!error) set_cached_acl(inode, type, acl); - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return error; } -int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, + struct page *dpage) { - return __f2fs_set_acl(inode, type, acl, NULL); + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct posix_acl *acl = NULL; + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(sbi, POSIX_ACL)) { + acl = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) + inode->i_mode &= ~current_umask(); + } + + if (!test_opt(sbi, POSIX_ACL) || !acl) + goto cleanup; + + if (S_ISDIR(inode->i_mode)) { + error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl, ipage); + if (error) + goto cleanup; + } + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + if (error < 0) + return error; + if (error > 0) + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); +cleanup: + posix_acl_release(acl); + return error; } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) +int f2fs_acl_chmod(struct inode *inode) { - struct posix_acl *default_acl, *acl; - int error = 0; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct posix_acl *acl; + int error; + umode_t mode = get_inode_mode(inode); + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(mode)) + return -EOPNOTSUPP; - error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); + acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + error = posix_acl_chmod(&acl, GFP_KERNEL, mode); if (error) return error; - if (default_acl) { - error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, - ipage); - posix_acl_release(default_acl); - } - if (acl) { - if (error) - error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, - ipage); - posix_acl_release(acl); + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, NULL); + posix_acl_release(acl); + return error; +} + +static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + const char *xname = POSIX_ACL_XATTR_DEFAULT; + size_t size; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + + if (type == ACL_TYPE_ACCESS) + xname = POSIX_ACL_XATTR_ACCESS; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct posix_acl *acl; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) + return -ENODATA; + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else { + acl = NULL; } + error = f2fs_set_acl(inode, type, acl, NULL); + +release_and_out: + posix_acl_release(acl); return error; } + +const struct xattr_handler f2fs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; + +const struct xattr_handler f2fs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index e0864651..b4ba6866 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -37,15 +37,21 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL extern struct posix_acl *f2fs_get_acl(struct inode *, int); -extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); +extern int f2fs_acl_chmod(struct inode *); +extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, + struct page *); #else #define f2fs_check_acl NULL #define f2fs_get_acl NULL #define f2fs_set_acl NULL +static inline int f2fs_acl_chmod(struct inode *inode) +{ + return 0; +} + static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, - struct page *page) + struct page *ipage, struct page *dpage) { return 0; } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd10a031..958890f6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -20,10 +20,11 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include static struct kmem_cache *ino_entry_slab; -static struct kmem_cache *inode_entry_slab; +struct kmem_cache *inode_entry_slab; /* * We guarantee no failure on the returned page. @@ -50,6 +51,13 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); struct page *page; + struct f2fs_io_info fio = { + .sbi = sbi, + .type = META, + .rw = READ_SYNC | REQ_META | REQ_PRIO, + .blk_addr = index, + .encrypted_page = NULL, + }; repeat: page = grab_cache_page(mapping, index); if (!page) { @@ -59,8 +67,9 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) if (PageUptodate(page)) goto out; - if (f2fs_submit_page_bio(sbi, page, index, - READ_SYNC | REQ_META | REQ_PRIO)) + fio.page = page; + + if (f2fs_submit_page_bio(&fio)) goto repeat; lock_page(page); @@ -69,39 +78,39 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) goto repeat; } out: + mark_page_accessed(page); return page; } -struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) -{ - bool readahead = false; - struct page *page; - - page = find_get_page(META_MAPPING(sbi), index); - if (!page || (page && !PageUptodate(page))) - readahead = true; - f2fs_put_page(page, 0); - - if (readahead) - ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); - return get_meta_page(sbi, index); -} - -static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { switch (type) { case META_NAT: - return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK; + break; case META_SIT: - return SIT_BLK_CNT(sbi); + if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) + return false; + break; case META_SSA: + if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || + blkaddr < SM_I(sbi)->ssa_blkaddr)) + return false; + break; case META_CP: - return 0; + if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || + blkaddr < __start_cp_addr(sbi))) + return false; + break; case META_POR: - return MAX_BLKADDR(sbi); + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } + + return true; } /* @@ -112,48 +121,45 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type block_t prev_blk_addr = 0; struct page *page; block_t blkno = start; - block_t max_blks = get_max_meta_blks(sbi, type); - struct f2fs_io_info fio = { + .sbi = sbi, .type = META, - .rw = READ_SYNC | REQ_META | REQ_PRIO + .rw = READ_SYNC | REQ_META | REQ_PRIO, + .encrypted_page = NULL, }; for (; nrpages-- > 0; blkno++) { - block_t blk_addr; + + if (!is_valid_blkaddr(sbi, blkno, type)) + goto out; switch (type) { case META_NAT: - /* get nat block addr */ - if (unlikely(blkno >= max_blks)) + if (unlikely(blkno >= + NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) blkno = 0; - blk_addr = current_nat_addr(sbi, + /* get nat block addr */ + fio.blk_addr = current_nat_addr(sbi, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: /* get sit block addr */ - if (unlikely(blkno >= max_blks)) - goto out; - blk_addr = current_sit_addr(sbi, + fio.blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); - if (blkno != start && prev_blk_addr + 1 != blk_addr) + if (blkno != start && prev_blk_addr + 1 != fio.blk_addr) goto out; - prev_blk_addr = blk_addr; + prev_blk_addr = fio.blk_addr; break; case META_SSA: case META_CP: case META_POR: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < SEG0_BLKADDR(sbi))) - goto out; - blk_addr = blkno; + fio.blk_addr = blkno; break; default: BUG(); } - page = grab_cache_page(META_MAPPING(sbi), blk_addr); + page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr); if (!page) continue; if (PageUptodate(page)) { @@ -161,7 +167,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type continue; } - f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); + fio.page = page; + f2fs_submit_page_mbio(&fio); f2fs_put_page(page, 0); } out: @@ -169,6 +176,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type return blkno - start; } +void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + bool readahead = false; + + page = find_get_page(META_MAPPING(sbi), index); + if (!page || (page && !PageUptodate(page))) + readahead = true; + f2fs_put_page(page, 0); + + if (readahead) + ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); +} + static int f2fs_write_meta_page(struct page *page, struct writeback_control *wbc) { @@ -176,9 +197,9 @@ static int f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (wbc->for_reclaim) + if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; @@ -187,6 +208,9 @@ static int f2fs_write_meta_page(struct page *page, write_meta_page(sbi, page); dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, META, WRITE); return 0; redirty_out: @@ -259,7 +283,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, if (!clear_page_dirty_for_io(page)) goto continue_unlock; - if (f2fs_write_meta_page(page, &wbc)) { + if (mapping->a_ops->writepage(page, &wbc)) { unlock_page(page); break; } @@ -285,6 +309,8 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); + SetPagePrivate(page); + f2fs_trace_pid(page); return 1; } return 0; @@ -294,50 +320,63 @@ const struct address_space_operations f2fs_meta_aops = { .writepage = f2fs_write_meta_page, .writepages = f2fs_write_meta_pages, .set_page_dirty = f2fs_set_meta_page_dirty, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, }; static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; retry: - spin_lock(&sbi->ino_lock[type]); + if (radix_tree_preload(GFP_NOFS)) { + cond_resched(); + goto retry; + } + + spin_lock(&im->ino_lock); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + e = radix_tree_lookup(&im->ino_root, ino); if (!e) { e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); if (!e) { - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); + radix_tree_preload_end(); goto retry; } - if (radix_tree_insert(&sbi->ino_root[type], ino, e)) { - spin_unlock(&sbi->ino_lock[type]); + if (radix_tree_insert(&im->ino_root, ino, e)) { + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); + radix_tree_preload_end(); goto retry; } memset(e, 0, sizeof(struct ino_entry)); e->ino = ino; - list_add_tail(&e->list, &sbi->ino_list[type]); + list_add_tail(&e->list, &im->ino_list); + if (type != ORPHAN_INO) + im->ino_num++; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); + radix_tree_preload_end(); } static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[type]); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); if (e) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[type], ino); - if (type == ORPHAN_INO) - sbi->n_orphans--; - spin_unlock(&sbi->ino_lock[type]); + radix_tree_delete(&im->ino_root, ino); + im->ino_num--; + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); return; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); } void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) @@ -355,10 +394,12 @@ void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) /* mode should be APPEND_INO or UPDATE_INO */ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { + struct inode_management *im = &sbi->im[mode]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[mode]); - e = radix_tree_lookup(&sbi->ino_root[mode], ino); - spin_unlock(&sbi->ino_lock[mode]); + + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); + spin_unlock(&im->ino_lock); return e ? true : false; } @@ -368,36 +409,42 @@ void release_dirty_inode(struct f2fs_sb_info *sbi) int i; for (i = APPEND_INO; i <= UPDATE_INO; i++) { - spin_lock(&sbi->ino_lock[i]); - list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) { + struct inode_management *im = &sbi->im[i]; + + spin_lock(&im->ino_lock); + list_for_each_entry_safe(e, tmp, &im->ino_list, list) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[i], e->ino); + radix_tree_delete(&im->ino_root, e->ino); kmem_cache_free(ino_entry_slab, e); + im->ino_num--; } - spin_unlock(&sbi->ino_lock[i]); + spin_unlock(&im->ino_lock); } } int acquire_orphan_inode(struct f2fs_sb_info *sbi) { + struct inode_management *im = &sbi->im[ORPHAN_INO]; int err = 0; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - if (unlikely(sbi->n_orphans >= sbi->max_orphans)) + spin_lock(&im->ino_lock); + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else - sbi->n_orphans++; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + im->ino_num++; + spin_unlock(&im->ino_lock); return err; } void release_orphan_inode(struct f2fs_sb_info *sbi) { - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - f2fs_bug_on(sbi, sbi->n_orphans == 0); - sbi->n_orphans--; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + spin_lock(&im->ino_lock); + f2fs_bug_on(sbi, im->ino_num == 0); + im->ino_num--; + spin_unlock(&im->ino_lock); } void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -424,20 +471,19 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) void recover_orphan_inodes(struct f2fs_sb_info *sbi) { - block_t start_blk, orphan_blkaddr, i, j; + block_t start_blk, orphan_blocks, i, j; if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) return; - sbi->por_doing = true; + set_sbi_flag(sbi, SBI_POR_DOING); - start_blk = __start_cp_addr(sbi) + 1 + - le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); - orphan_blkaddr = __start_sum_addr(sbi) - 1; + start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); + orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); - ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); + ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP); - for (i = 0; i < orphan_blkaddr; i++) { + for (i = 0; i < orphan_blocks; i++) { struct page *page = get_meta_page(sbi, start_blk + i); struct f2fs_orphan_block *orphan_blk; @@ -450,7 +496,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) } /* clear Orphan Flag */ clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); return; } @@ -460,17 +506,24 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = - (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); + unsigned short orphan_blocks; struct page *page = NULL; struct ino_entry *orphan = NULL; + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); for (index = 0; index < orphan_blocks; index++) grab_meta_page(sbi, start_blk + index); index = 1; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - head = &sbi->ino_list[ORPHAN_INO]; + + /* + * we don't need to do spin_lock(&im->ino_lock) here, since all the + * orphan inode operations are covered under f2fs_lock_op(). + * And, spin_lock should be avoided due to page operations below. + */ + head = &im->ino_list; /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { @@ -509,8 +562,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) set_page_dirty(page); f2fs_put_page(page, 1); } - - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -532,7 +583,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp1; - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp1; @@ -547,7 +598,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp2; - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp2; @@ -573,7 +624,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned long blk_size = sbi->blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; - unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + unsigned int cp_blks = 1 + __cp_payload(sbi); block_t cp_blk_no; int i; @@ -634,7 +685,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) return -EINVAL; } -static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) +static int __add_dirty_inode(struct inode *inode, struct inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -651,7 +702,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new; + struct inode_entry *new; int ret = 0; if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) @@ -675,12 +726,13 @@ void update_dirty_page(struct inode *inode, struct page *page) kmem_cache_free(inode_entry_slab, new); out: SetPagePrivate(page); + f2fs_trace_pid(page); } void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new = + struct inode_entry *new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); int ret = 0; @@ -698,7 +750,7 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *entry; + struct inode_entry *entry; if (!S_ISDIR(inode->i_mode)) return; @@ -728,9 +780,12 @@ void remove_dirty_dir_inode(struct inode *inode) void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; - struct dir_inode_entry *entry; + struct inode_entry *entry; struct inode *inode; retry: + if (unlikely(f2fs_cp_error(sbi))) + return; + spin_lock(&sbi->dir_inode_lock); head = &sbi->dir_inode_list; @@ -738,7 +793,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) spin_unlock(&sbi->dir_inode_lock); return; } - entry = list_entry(head->next, struct dir_inode_entry, list); + entry = list_entry(head->next, struct inode_entry, list); inode = igrab(entry->inode); spin_unlock(&sbi->dir_inode_lock); if (inode) { @@ -750,6 +805,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) * wribacking dentry pages in the freeing inode. */ f2fs_submit_merged_bio(sbi, DATA, WRITE); + cond_resched(); } goto retry; } @@ -830,14 +886,13 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; - struct page *cp_page; unsigned int data_sum_blocks, orphan_blocks; __u32 crc32 = 0; - void *kaddr; int i; - int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + int cp_payload_blks = __cp_payload(sbi); /* * This avoids to conduct wrong roll-forward operations and uses @@ -883,34 +938,41 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->next_free_nid = cpu_to_le32(last_nid); /* 2 cp + n data seg summary + orphan inode blocks */ - data_sum_blocks = npages_for_summary_flush(sbi); + data_sum_blocks = npages_for_summary_flush(sbi, false); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); + orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); - if (cpc->reason == CP_UMOUNT) { - set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + if (__remain_node_summaries(cpc->reason)) ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); - } else { - clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + else ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks); - } - if (sbi->n_orphans) + if (cpc->reason == CP_UMOUNT) + set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + else + clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + + if (cpc->reason == CP_FASTBOOT) + set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + else + clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + + if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (sbi->need_fsck) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) set_ckpt_flags(ckpt, CP_FSCK_FLAG); /* update SIT/NAT bitmap */ @@ -925,39 +987,26 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk = __start_cp_addr(sbi); /* write out checkpoint buffer at block 0 */ - cp_page = grab_meta_page(sbi, start_blk++); - kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); - - for (i = 1; i < 1 + cp_payload_blks; i++) { - cp_page = grab_meta_page(sbi, start_blk++); - kaddr = page_address(cp_page); - memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, - (1 << sbi->log_blocksize)); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); - } + update_meta_page(sbi, ckpt, start_blk++); + + for (i = 1; i < 1 + cp_payload_blks; i++) + update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE, + start_blk++); - if (sbi->n_orphans) { + if (orphan_num) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; } write_data_summaries(sbi, start_blk); start_blk += data_sum_blocks; - if (cpc->reason == CP_UMOUNT) { + if (__remain_node_summaries(cpc->reason)) { write_node_summaries(sbi, start_blk); start_blk += NR_CURSEG_NODE_TYPE; } /* writeout checkpoint block */ - cp_page = grab_meta_page(sbi, start_blk); - kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); + update_meta_page(sbi, ckpt, start_blk); /* wait for previous submitted node/meta pages writeback */ wait_on_all_pages_writeback(sbi); @@ -975,13 +1024,16 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + /* wait for previous submitted meta pages writeback */ + wait_on_all_pages_writeback(sbi); + release_dirty_inode(sbi); if (unlikely(f2fs_cp_error(sbi))) return; - clear_prefree_segments(sbi); - F2FS_RESET_SB_DIRT(sbi); + clear_prefree_segments(sbi, cpc); + clear_sbi_flag(sbi, SBI_IS_DIRTY); } /* @@ -992,14 +1044,19 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); - mutex_lock(&sbi->cp_mutex); - if (!sbi->s_dirty && cpc->reason != CP_DISCARD) + if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && + (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || + (cpc->reason == CP_DISCARD && !sbi->discard_blks))) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; + if (f2fs_readonly(sbi->sb)) + goto out; + + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); + if (block_operations(sbi)) goto out; @@ -1026,6 +1083,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); + + if (cpc->reason == CP_RECOVERY) + f2fs_msg(sbi->sb, KERN_NOTICE, + "checkpoint: version = %llx", ckpt_ver); + + /* do checkpoint periodically */ + sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval); out: mutex_unlock(&sbi->cp_mutex); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); @@ -1036,20 +1100,17 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) int i; for (i = 0; i < MAX_INO_ENTRY; i++) { - INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); - spin_lock_init(&sbi->ino_lock[i]); - INIT_LIST_HEAD(&sbi->ino_list[i]); + struct inode_management *im = &sbi->im[i]; + + INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); + spin_lock_init(&im->ino_lock); + INIT_LIST_HEAD(&im->ino_list); + im->ino_num = 0; } - /* - * considering 512 blocks in a segment 8 blocks are needed for cp - * and log segment summaries. Remaining blocks are used to keep - * orphan entries with the limitation one reserved segment - * for cp pack we can have max 1020*504 orphan entries - */ - sbi->n_orphans = 0; sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; + NR_CURSEG_TYPE - __cp_payload(sbi)) * + F2FS_ORPHANS_PER_BLOCK; } int __init create_checkpoint_caches(void) @@ -1058,8 +1119,8 @@ int __init create_checkpoint_caches(void) sizeof(struct ino_entry)); if (!ino_entry_slab) return -ENOMEM; - inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", - sizeof(struct dir_inode_entry)); + inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", + sizeof(struct inode_entry)); if (!inode_entry_slab) { kmem_cache_destroy(ino_entry_slab); return -ENOMEM; diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c new file mode 100644 index 00000000..4a62ef14 --- /dev/null +++ b/fs/f2fs/crypto.c @@ -0,0 +1,491 @@ +/* + * linux/fs/f2fs/crypto.c + * + * Copied from linux/fs/ext4/crypto.c + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility + * + * This contains encryption functions for f2fs + * + * Written by Michael Halcrow, 2014. + * + * Filename encryption additions + * Uday Savagaonkar, 2014 + * Encryption policy handling additions + * Ildar Muslukhov, 2014 + * Remove ext4_encrypted_zeroout(), + * add f2fs_restore_and_release_control_page() + * Jaegeuk Kim, 2015. + * + * This has not yet undergone a rigorous security audit. + * + * The usage of AES-XTS should conform to recommendations in NIST + * Special Publication 800-38E and IEEE P1619/D16. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "xattr.h" + +/* Encryption added and removed here! (L: */ + +static unsigned int num_prealloc_crypto_pages = 32; +static unsigned int num_prealloc_crypto_ctxs = 128; + +module_param(num_prealloc_crypto_pages, uint, 0444); +MODULE_PARM_DESC(num_prealloc_crypto_pages, + "Number of crypto pages to preallocate"); +module_param(num_prealloc_crypto_ctxs, uint, 0444); +MODULE_PARM_DESC(num_prealloc_crypto_ctxs, + "Number of crypto contexts to preallocate"); + +static mempool_t *f2fs_bounce_page_pool; + +static LIST_HEAD(f2fs_free_crypto_ctxs); +static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock); + +static struct workqueue_struct *f2fs_read_workqueue; +static DEFINE_MUTEX(crypto_init); + +static struct kmem_cache *f2fs_crypto_ctx_cachep; +struct kmem_cache *f2fs_crypt_info_cachep; + +/** + * f2fs_release_crypto_ctx() - Releases an encryption context + * @ctx: The encryption context to release. + * + * If the encryption context was allocated from the pre-allocated pool, returns + * it to that pool. Else, frees it. + * + * If there's a bounce page in the context, this frees that. + */ +void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx) +{ + unsigned long flags; + + if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) { + mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool); + ctx->w.bounce_page = NULL; + } + ctx->w.control_page = NULL; + if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { + kmem_cache_free(f2fs_crypto_ctx_cachep, ctx); + } else { + spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); + list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); + spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); + } +} + +/** + * f2fs_get_crypto_ctx() - Gets an encryption context + * @inode: The inode for which we are doing the crypto + * + * Allocates and initializes an encryption context. + * + * Return: An allocated and initialized encryption context on success; error + * value or NULL otherwise. + */ +struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode) +{ + struct f2fs_crypto_ctx *ctx = NULL; + unsigned long flags; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (ci == NULL) + return ERR_PTR(-ENOKEY); + + /* + * We first try getting the ctx from a free list because in + * the common case the ctx will have an allocated and + * initialized crypto tfm, so it's probably a worthwhile + * optimization. For the bounce page, we first try getting it + * from the kernel allocator because that's just about as fast + * as getting it from a list and because a cache of free pages + * should generally be a "last resort" option for a filesystem + * to be able to do its job. + */ + spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags); + ctx = list_first_entry_or_null(&f2fs_free_crypto_ctxs, + struct f2fs_crypto_ctx, free_list); + if (ctx) + list_del(&ctx->free_list); + spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags); + if (!ctx) { + ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS); + if (!ctx) + return ERR_PTR(-ENOMEM); + ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; + } else { + ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL; + } + ctx->flags &= ~F2FS_WRITE_PATH_FL; + return ctx; +} + +/* + * Call f2fs_decrypt on every single page, reusing the encryption + * context. + */ +static void completion_pages(struct work_struct *work) +{ + struct f2fs_crypto_ctx *ctx = + container_of(work, struct f2fs_crypto_ctx, r.work); + struct bio *bio = ctx->r.bio; + struct bio_vec *bv; + int i; + + bio_for_each_segment_all(bv, bio, i) { + struct page *page = bv->bv_page; + int ret = f2fs_decrypt(ctx, page); + + if (ret) { + WARN_ON_ONCE(1); + SetPageError(page); + } else + SetPageUptodate(page); + unlock_page(page); + } + f2fs_release_crypto_ctx(ctx); + bio_put(bio); +} + +void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio) +{ + INIT_WORK(&ctx->r.work, completion_pages); + ctx->r.bio = bio; + queue_work(f2fs_read_workqueue, &ctx->r.work); +} + +static void f2fs_crypto_destroy(void) +{ + struct f2fs_crypto_ctx *pos, *n; + + list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list) + kmem_cache_free(f2fs_crypto_ctx_cachep, pos); + INIT_LIST_HEAD(&f2fs_free_crypto_ctxs); + if (f2fs_bounce_page_pool) + mempool_destroy(f2fs_bounce_page_pool); + f2fs_bounce_page_pool = NULL; +} + +/** + * f2fs_crypto_initialize() - Set up for f2fs encryption. + * + * We only call this when we start accessing encrypted files, since it + * results in memory getting allocated that wouldn't otherwise be used. + * + * Return: Zero on success, non-zero otherwise. + */ +int f2fs_crypto_initialize(void) +{ + int i, res = -ENOMEM; + + if (f2fs_bounce_page_pool) + return 0; + + mutex_lock(&crypto_init); + if (f2fs_bounce_page_pool) + goto already_initialized; + + for (i = 0; i < num_prealloc_crypto_ctxs; i++) { + struct f2fs_crypto_ctx *ctx; + + ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_KERNEL); + if (!ctx) + goto fail; + list_add(&ctx->free_list, &f2fs_free_crypto_ctxs); + } + + /* must be allocated at the last step to avoid race condition above */ + f2fs_bounce_page_pool = + mempool_create_page_pool(num_prealloc_crypto_pages, 0); + if (!f2fs_bounce_page_pool) + goto fail; + +already_initialized: + mutex_unlock(&crypto_init); + return 0; +fail: + f2fs_crypto_destroy(); + mutex_unlock(&crypto_init); + return res; +} + +/** + * f2fs_exit_crypto() - Shutdown the f2fs encryption system + */ +void f2fs_exit_crypto(void) +{ + f2fs_crypto_destroy(); + + if (f2fs_read_workqueue) + destroy_workqueue(f2fs_read_workqueue); + if (f2fs_crypto_ctx_cachep) + kmem_cache_destroy(f2fs_crypto_ctx_cachep); + if (f2fs_crypt_info_cachep) + kmem_cache_destroy(f2fs_crypt_info_cachep); +} + +int __init f2fs_init_crypto(void) +{ + int res = -ENOMEM; + + f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0); + if (!f2fs_read_workqueue) + goto fail; + + f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypto_ctx_cachep) + goto fail; + + f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info, + SLAB_RECLAIM_ACCOUNT); + if (!f2fs_crypt_info_cachep) + goto fail; + + return 0; +fail: + f2fs_exit_crypto(); + return res; +} + +void f2fs_restore_and_release_control_page(struct page **page) +{ + struct f2fs_crypto_ctx *ctx; + struct page *bounce_page; + + /* The bounce data pages are unmapped. */ + if ((*page)->mapping) + return; + + /* The bounce data page is unmapped. */ + bounce_page = *page; + ctx = (struct f2fs_crypto_ctx *)page_private(bounce_page); + + /* restore control page */ + *page = ctx->w.control_page; + + f2fs_restore_control_page(bounce_page); +} + +void f2fs_restore_control_page(struct page *data_page) +{ + struct f2fs_crypto_ctx *ctx = + (struct f2fs_crypto_ctx *)page_private(data_page); + + set_page_private(data_page, (unsigned long)NULL); + ClearPagePrivate(data_page); + unlock_page(data_page); + f2fs_release_crypto_ctx(ctx); +} + +/** + * f2fs_crypt_complete() - The completion callback for page encryption + * @req: The asynchronous encryption request context + * @res: The result of the encryption operation + */ +static void f2fs_crypt_complete(struct crypto_async_request *req, int res) +{ + struct f2fs_completion_result *ecr = req->data; + + if (res == -EINPROGRESS) + return; + ecr->res = res; + complete(&ecr->completion); +} + +typedef enum { + F2FS_DECRYPT = 0, + F2FS_ENCRYPT, +} f2fs_direction_t; + +static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx, + struct inode *inode, + f2fs_direction_t rw, + pgoff_t index, + struct page *src_page, + struct page *dest_page) +{ + u8 xts_tweak[F2FS_XTS_TWEAK_SIZE]; + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct scatterlist dst, src; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; + int res = 0; + + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + printk_ratelimited(KERN_ERR + "%s: crypto_request_alloc() failed\n", + __func__); + return -ENOMEM; + } + ablkcipher_request_set_callback( + req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + f2fs_crypt_complete, &ecr); + + BUILD_BUG_ON(F2FS_XTS_TWEAK_SIZE < sizeof(index)); + memcpy(xts_tweak, &index, sizeof(index)); + memset(&xts_tweak[sizeof(index)], 0, + F2FS_XTS_TWEAK_SIZE - sizeof(index)); + + sg_init_table(&dst, 1); + sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0); + sg_init_table(&src, 1); + sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0); + ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE, + xts_tweak); + if (rw == F2FS_DECRYPT) + res = crypto_ablkcipher_decrypt(req); + else + res = crypto_ablkcipher_encrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } + ablkcipher_request_free(req); + if (res) { + printk_ratelimited(KERN_ERR + "%s: crypto_ablkcipher_encrypt() returned %d\n", + __func__, res); + return res; + } + return 0; +} + +static struct page *alloc_bounce_page(struct f2fs_crypto_ctx *ctx) +{ + ctx->w.bounce_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT); + if (ctx->w.bounce_page == NULL) + return ERR_PTR(-ENOMEM); + ctx->flags |= F2FS_WRITE_PATH_FL; + return ctx->w.bounce_page; +} + +/** + * f2fs_encrypt() - Encrypts a page + * @inode: The inode for which the encryption should take place + * @plaintext_page: The page to encrypt. Must be locked. + * + * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx + * encryption context. + * + * Called on the page write path. The caller must call + * f2fs_restore_control_page() on the returned ciphertext page to + * release the bounce buffer and the encryption context. + * + * Return: An allocated page with the encrypted content on success. Else, an + * error value or NULL. + */ +struct page *f2fs_encrypt(struct inode *inode, + struct page *plaintext_page) +{ + struct f2fs_crypto_ctx *ctx; + struct page *ciphertext_page = NULL; + int err; + + BUG_ON(!PageLocked(plaintext_page)); + + ctx = f2fs_get_crypto_ctx(inode); + if (IS_ERR(ctx)) + return (struct page *)ctx; + + /* The encryption operation will require a bounce page. */ + ciphertext_page = alloc_bounce_page(ctx); + if (IS_ERR(ciphertext_page)) + goto err_out; + + ctx->w.control_page = plaintext_page; + err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index, + plaintext_page, ciphertext_page); + if (err) { + ciphertext_page = ERR_PTR(err); + goto err_out; + } + + SetPagePrivate(ciphertext_page); + set_page_private(ciphertext_page, (unsigned long)ctx); + lock_page(ciphertext_page); + return ciphertext_page; + +err_out: + f2fs_release_crypto_ctx(ctx); + return ciphertext_page; +} + +/** + * f2fs_decrypt() - Decrypts a page in-place + * @ctx: The encryption context. + * @page: The page to decrypt. Must be locked. + * + * Decrypts page in-place using the ctx encryption context. + * + * Called from the read completion callback. + * + * Return: Zero on success, non-zero otherwise. + */ +int f2fs_decrypt(struct f2fs_crypto_ctx *ctx, struct page *page) +{ + BUG_ON(!PageLocked(page)); + + return f2fs_page_crypto(ctx, page->mapping->host, + F2FS_DECRYPT, page->index, page, page); +} + +/* + * Convenience function which takes care of allocating and + * deallocating the encryption context + */ +int f2fs_decrypt_one(struct inode *inode, struct page *page) +{ + struct f2fs_crypto_ctx *ctx = f2fs_get_crypto_ctx(inode); + int ret; + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + ret = f2fs_decrypt(ctx, page); + f2fs_release_crypto_ctx(ctx); + return ret; +} + +bool f2fs_valid_contents_enc_mode(uint32_t mode) +{ + return (mode == F2FS_ENCRYPTION_MODE_AES_256_XTS); +} + +/** + * f2fs_validate_encryption_key_size() - Validate the encryption key size + * @mode: The key mode. + * @size: The key size to validate. + * + * Return: The validated key size for @mode. Zero if invalid. + */ +uint32_t f2fs_validate_encryption_key_size(uint32_t mode, uint32_t size) +{ + if (size == f2fs_encryption_key_size(mode)) + return size; + return 0; +} diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c new file mode 100644 index 00000000..ab377d49 --- /dev/null +++ b/fs/f2fs/crypto_fname.c @@ -0,0 +1,440 @@ +/* + * linux/fs/f2fs/crypto_fname.c + * + * Copied from linux/fs/ext4/crypto.c + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility + * + * This contains functions for filename crypto management in f2fs + * + * Written by Uday Savagaonkar, 2014. + * + * Adjust f2fs dentry structure + * Jaegeuk Kim, 2015. + * + * This has not yet undergone a rigorous security audit. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "f2fs_crypto.h" +#include "xattr.h" + +/** + * f2fs_dir_crypt_complete() - + */ +static void f2fs_dir_crypt_complete(struct crypto_async_request *req, int res) +{ + struct f2fs_completion_result *ecr = req->data; + + if (res == -EINPROGRESS) + return; + ecr->res = res; + complete(&ecr->completion); +} + +bool f2fs_valid_filenames_enc_mode(uint32_t mode) +{ + return (mode == F2FS_ENCRYPTION_MODE_AES_256_CTS); +} + +static unsigned max_name_len(struct inode *inode) +{ + return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize : + F2FS_NAME_LEN; +} + +/** + * f2fs_fname_encrypt() - + * + * This function encrypts the input filename, and returns the length of the + * ciphertext. Errors are returned as negative numbers. We trust the caller to + * allocate sufficient memory to oname string. + */ +static int f2fs_fname_encrypt(struct inode *inode, + const struct qstr *iname, struct f2fs_str *oname) +{ + u32 ciphertext_len; + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; + int res = 0; + char iv[F2FS_CRYPTO_BLOCK_SIZE]; + struct scatterlist src_sg, dst_sg; + int padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK); + char *workbuf, buf[32], *alloc_buf = NULL; + unsigned lim = max_name_len(inode); + + if (iname->len <= 0 || iname->len > lim) + return -EIO; + + ciphertext_len = (iname->len < F2FS_CRYPTO_BLOCK_SIZE) ? + F2FS_CRYPTO_BLOCK_SIZE : iname->len; + ciphertext_len = f2fs_fname_crypto_round_up(ciphertext_len, padding); + ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len; + + if (ciphertext_len <= sizeof(buf)) { + workbuf = buf; + } else { + alloc_buf = kmalloc(ciphertext_len, GFP_NOFS); + if (!alloc_buf) + return -ENOMEM; + workbuf = alloc_buf; + } + + /* Allocate request */ + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + printk_ratelimited(KERN_ERR + "%s: crypto_request_alloc() failed\n", __func__); + kfree(alloc_buf); + return -ENOMEM; + } + ablkcipher_request_set_callback(req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + f2fs_dir_crypt_complete, &ecr); + + /* Copy the input */ + memcpy(workbuf, iname->name, iname->len); + if (iname->len < ciphertext_len) + memset(workbuf + iname->len, 0, ciphertext_len - iname->len); + + /* Initialize IV */ + memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE); + + /* Create encryption request */ + sg_init_one(&src_sg, workbuf, ciphertext_len); + sg_init_one(&dst_sg, oname->name, ciphertext_len); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); + res = crypto_ablkcipher_encrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } + kfree(alloc_buf); + ablkcipher_request_free(req); + if (res < 0) { + printk_ratelimited(KERN_ERR + "%s: Error (error code %d)\n", __func__, res); + } + oname->len = ciphertext_len; + return res; +} + +/* + * f2fs_fname_decrypt() + * This function decrypts the input filename, and returns + * the length of the plaintext. + * Errors are returned as negative numbers. + * We trust the caller to allocate sufficient memory to oname string. + */ +static int f2fs_fname_decrypt(struct inode *inode, + const struct f2fs_str *iname, struct f2fs_str *oname) +{ + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct scatterlist src_sg, dst_sg; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; + int res = 0; + char iv[F2FS_CRYPTO_BLOCK_SIZE]; + unsigned lim = max_name_len(inode); + + if (iname->len <= 0 || iname->len > lim) + return -EIO; + + /* Allocate request */ + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + printk_ratelimited(KERN_ERR + "%s: crypto_request_alloc() failed\n", __func__); + return -ENOMEM; + } + ablkcipher_request_set_callback(req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + f2fs_dir_crypt_complete, &ecr); + + /* Initialize IV */ + memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE); + + /* Create decryption request */ + sg_init_one(&src_sg, iname->name, iname->len); + sg_init_one(&dst_sg, oname->name, oname->len); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); + res = crypto_ablkcipher_decrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } + ablkcipher_request_free(req); + if (res < 0) { + printk_ratelimited(KERN_ERR + "%s: Error in f2fs_fname_decrypt (error code %d)\n", + __func__, res); + return res; + } + + oname->len = strnlen(oname->name, iname->len); + return oname->len; +} + +static const char *lookup_table = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; + +/** + * f2fs_fname_encode_digest() - + * + * Encodes the input digest using characters from the set [a-zA-Z0-9_+]. + * The encoded string is roughly 4/3 times the size of the input string. + */ +static int digest_encode(const char *src, int len, char *dst) +{ + int i = 0, bits = 0, ac = 0; + char *cp = dst; + + while (i < len) { + ac += (((unsigned char) src[i]) << bits); + bits += 8; + do { + *cp++ = lookup_table[ac & 0x3f]; + ac >>= 6; + bits -= 6; + } while (bits >= 6); + i++; + } + if (bits) + *cp++ = lookup_table[ac & 0x3f]; + return cp - dst; +} + +static int digest_decode(const char *src, int len, char *dst) +{ + int i = 0, bits = 0, ac = 0; + const char *p; + char *cp = dst; + + while (i < len) { + p = strchr(lookup_table, src[i]); + if (p == NULL || src[i] == 0) + return -2; + ac += (p - lookup_table) << bits; + bits += 6; + if (bits >= 8) { + *cp++ = ac & 0xff; + ac >>= 8; + bits -= 8; + } + i++; + } + if (ac) + return -1; + return cp - dst; +} + +/** + * f2fs_fname_crypto_round_up() - + * + * Return: The next multiple of block size + */ +u32 f2fs_fname_crypto_round_up(u32 size, u32 blksize) +{ + return ((size + blksize - 1) / blksize) * blksize; +} + +/** + * f2fs_fname_crypto_alloc_obuff() - + * + * Allocates an output buffer that is sufficient for the crypto operation + * specified by the context and the direction. + */ +int f2fs_fname_crypto_alloc_buffer(struct inode *inode, + u32 ilen, struct f2fs_str *crypto_str) +{ + unsigned int olen; + int padding = 16; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (ci) + padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK); + if (padding < F2FS_CRYPTO_BLOCK_SIZE) + padding = F2FS_CRYPTO_BLOCK_SIZE; + olen = f2fs_fname_crypto_round_up(ilen, padding); + crypto_str->len = olen; + if (olen < F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2) + olen = F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2; + /* Allocated buffer can hold one more character to null-terminate the + * string */ + crypto_str->name = kmalloc(olen + 1, GFP_NOFS); + if (!(crypto_str->name)) + return -ENOMEM; + return 0; +} + +/** + * f2fs_fname_crypto_free_buffer() - + * + * Frees the buffer allocated for crypto operation. + */ +void f2fs_fname_crypto_free_buffer(struct f2fs_str *crypto_str) +{ + if (!crypto_str) + return; + kfree(crypto_str->name); + crypto_str->name = NULL; +} + +/** + * f2fs_fname_disk_to_usr() - converts a filename from disk space to user space + */ +int f2fs_fname_disk_to_usr(struct inode *inode, + f2fs_hash_t *hash, + const struct f2fs_str *iname, + struct f2fs_str *oname) +{ + const struct qstr qname = FSTR_TO_QSTR(iname); + char buf[24]; + int ret; + + if (is_dot_dotdot(&qname)) { + oname->name[0] = '.'; + oname->name[iname->len - 1] = '.'; + oname->len = iname->len; + return oname->len; + } + + if (F2FS_I(inode)->i_crypt_info) + return f2fs_fname_decrypt(inode, iname, oname); + + if (iname->len <= F2FS_FNAME_CRYPTO_DIGEST_SIZE) { + ret = digest_encode(iname->name, iname->len, oname->name); + oname->len = ret; + return ret; + } + if (hash) { + memcpy(buf, hash, 4); + memset(buf + 4, 0, 4); + } else + memset(buf, 0, 8); + memcpy(buf + 8, iname->name + iname->len - 16, 16); + oname->name[0] = '_'; + ret = digest_encode(buf, 24, oname->name + 1); + oname->len = ret + 1; + return ret + 1; +} + +/** + * f2fs_fname_usr_to_disk() - converts a filename from user space to disk space + */ +int f2fs_fname_usr_to_disk(struct inode *inode, + const struct qstr *iname, + struct f2fs_str *oname) +{ + int res; + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (is_dot_dotdot(iname)) { + oname->name[0] = '.'; + oname->name[iname->len - 1] = '.'; + oname->len = iname->len; + return oname->len; + } + + if (ci) { + res = f2fs_fname_encrypt(inode, iname, oname); + return res; + } + /* Without a proper key, a user is not allowed to modify the filenames + * in a directory. Consequently, a user space name cannot be mapped to + * a disk-space name */ + return -EACCES; +} + +int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct f2fs_filename *fname) +{ + struct f2fs_crypt_info *ci; + int ret = 0, bigname = 0; + + memset(fname, 0, sizeof(struct f2fs_filename)); + fname->usr_fname = iname; + + if (!f2fs_encrypted_inode(dir) || is_dot_dotdot(iname)) { + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + return 0; + } + ret = f2fs_get_encryption_info(dir); + if (ret) + return ret; + ci = F2FS_I(dir)->i_crypt_info; + if (ci) { + ret = f2fs_fname_crypto_alloc_buffer(dir, iname->len, + &fname->crypto_buf); + if (ret < 0) + return ret; + ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf); + if (ret < 0) + goto errout; + fname->disk_name.name = fname->crypto_buf.name; + fname->disk_name.len = fname->crypto_buf.len; + return 0; + } + if (!lookup) + return -EACCES; + + /* We don't have the key and we are doing a lookup; decode the + * user-supplied name + */ + if (iname->name[0] == '_') + bigname = 1; + if ((bigname && (iname->len != 33)) || + (!bigname && (iname->len > 43))) + return -ENOENT; + + fname->crypto_buf.name = kmalloc(32, GFP_KERNEL); + if (fname->crypto_buf.name == NULL) + return -ENOMEM; + ret = digest_decode(iname->name + bigname, iname->len - bigname, + fname->crypto_buf.name); + if (ret < 0) { + ret = -ENOENT; + goto errout; + } + fname->crypto_buf.len = ret; + if (bigname) { + memcpy(&fname->hash, fname->crypto_buf.name, 4); + } else { + fname->disk_name.name = fname->crypto_buf.name; + fname->disk_name.len = fname->crypto_buf.len; + } + return 0; +errout: + f2fs_fname_crypto_free_buffer(&fname->crypto_buf); + return ret; +} + +void f2fs_fname_free_filename(struct f2fs_filename *fname) +{ + kfree(fname->crypto_buf.name); + fname->crypto_buf.name = NULL; + fname->usr_fname = NULL; + fname->disk_name.name = NULL; +} diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c new file mode 100644 index 00000000..95b8f936 --- /dev/null +++ b/fs/f2fs/crypto_key.c @@ -0,0 +1,255 @@ +/* + * linux/fs/f2fs/crypto_key.c + * + * Copied from linux/fs/f2fs/crypto_key.c + * + * Copyright (C) 2015, Google, Inc. + * + * This contains encryption key functions for f2fs + * + * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "xattr.h" + +static void derive_crypt_complete(struct crypto_async_request *req, int rc) +{ + struct f2fs_completion_result *ecr = req->data; + + if (rc == -EINPROGRESS) + return; + + ecr->res = rc; + complete(&ecr->completion); +} + +/** + * f2fs_derive_key_aes() - Derive a key using AES-128-ECB + * @deriving_key: Encryption key used for derivatio. + * @source_key: Source key to which to apply derivation. + * @derived_key: Derived key. + * + * Return: Zero on success; non-zero otherwise. + */ +static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE], + char source_key[F2FS_AES_256_XTS_KEY_SIZE], + char derived_key[F2FS_AES_256_XTS_KEY_SIZE]) +{ + int res = 0; + struct ablkcipher_request *req = NULL; + DECLARE_F2FS_COMPLETION_RESULT(ecr); + struct scatterlist src_sg, dst_sg; + struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0, + 0); + + if (IS_ERR(tfm)) { + res = PTR_ERR(tfm); + tfm = NULL; + goto out; + } + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + req = ablkcipher_request_alloc(tfm, GFP_NOFS); + if (!req) { + res = -ENOMEM; + goto out; + } + ablkcipher_request_set_callback(req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + derive_crypt_complete, &ecr); + res = crypto_ablkcipher_setkey(tfm, deriving_key, + F2FS_AES_128_ECB_KEY_SIZE); + if (res < 0) + goto out; + + sg_init_one(&src_sg, source_key, F2FS_AES_256_XTS_KEY_SIZE); + sg_init_one(&dst_sg, derived_key, F2FS_AES_256_XTS_KEY_SIZE); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, + F2FS_AES_256_XTS_KEY_SIZE, NULL); + res = crypto_ablkcipher_encrypt(req); + if (res == -EINPROGRESS || res == -EBUSY) { + BUG_ON(req->base.data != &ecr); + wait_for_completion(&ecr.completion); + res = ecr.res; + } +out: + if (req) + ablkcipher_request_free(req); + if (tfm) + crypto_free_ablkcipher(tfm); + return res; +} + +static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci) +{ + if (!ci) + return; + + if (ci->ci_keyring_key) + key_put(ci->ci_keyring_key); + crypto_free_ablkcipher(ci->ci_ctfm); + kmem_cache_free(f2fs_crypt_info_cachep, ci); +} + +void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *prev; + + if (ci == NULL) + ci = ACCESS_ONCE(fi->i_crypt_info); + if (ci == NULL) + return; + prev = cmpxchg(&fi->i_crypt_info, ci, NULL); + if (prev != ci) + return; + + f2fs_free_crypt_info(ci); +} + +int _f2fs_get_encryption_info(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_crypt_info *crypt_info; + char full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE + + (F2FS_KEY_DESCRIPTOR_SIZE * 2) + 1]; + struct key *keyring_key = NULL; + struct f2fs_encryption_key *master_key; + struct f2fs_encryption_context ctx; + struct user_key_payload *ukp; + struct crypto_ablkcipher *ctfm; + const char *cipher_str; + char raw_key[F2FS_MAX_KEY_SIZE]; + char mode; + int res; + + res = f2fs_crypto_initialize(); + if (res) + return res; +retry: + crypt_info = ACCESS_ONCE(fi->i_crypt_info); + if (crypt_info) { + if (!crypt_info->ci_keyring_key || + key_validate(crypt_info->ci_keyring_key) == 0) + return 0; + f2fs_free_encryption_info(inode, crypt_info); + goto retry; + } + + res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, + &ctx, sizeof(ctx), NULL); + if (res < 0) + return res; + else if (res != sizeof(ctx)) + return -EINVAL; + res = 0; + + crypt_info = kmem_cache_alloc(f2fs_crypt_info_cachep, GFP_NOFS); + if (!crypt_info) + return -ENOMEM; + + crypt_info->ci_flags = ctx.flags; + crypt_info->ci_data_mode = ctx.contents_encryption_mode; + crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; + crypt_info->ci_ctfm = NULL; + crypt_info->ci_keyring_key = NULL; + memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, + sizeof(crypt_info->ci_master_key)); + if (S_ISREG(inode->i_mode)) + mode = crypt_info->ci_data_mode; + else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + mode = crypt_info->ci_filename_mode; + else + BUG(); + + switch (mode) { + case F2FS_ENCRYPTION_MODE_AES_256_XTS: + cipher_str = "xts(aes)"; + break; + case F2FS_ENCRYPTION_MODE_AES_256_CTS: + cipher_str = "cts(cbc(aes))"; + break; + default: + printk_once(KERN_WARNING + "f2fs: unsupported key mode %d (ino %u)\n", + mode, (unsigned) inode->i_ino); + res = -ENOKEY; + goto out; + } + + memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX, + F2FS_KEY_DESC_PREFIX_SIZE); + sprintf(full_key_descriptor + F2FS_KEY_DESC_PREFIX_SIZE, + "%*phN", F2FS_KEY_DESCRIPTOR_SIZE, + ctx.master_key_descriptor); + full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE + + (2 * F2FS_KEY_DESCRIPTOR_SIZE)] = '\0'; + keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); + if (IS_ERR(keyring_key)) { + res = PTR_ERR(keyring_key); + keyring_key = NULL; + goto out; + } + crypt_info->ci_keyring_key = keyring_key; + BUG_ON(keyring_key->type != &key_type_logon); + ukp = ((struct user_key_payload *)keyring_key->payload.data); + if (ukp->datalen != sizeof(struct f2fs_encryption_key)) { + res = -EINVAL; + goto out; + } + master_key = (struct f2fs_encryption_key *)ukp->data; + BUILD_BUG_ON(F2FS_AES_128_ECB_KEY_SIZE != + F2FS_KEY_DERIVATION_NONCE_SIZE); + BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE); + res = f2fs_derive_key_aes(ctx.nonce, master_key->raw, + raw_key); + if (res) + goto out; + + ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0); + if (!ctfm || IS_ERR(ctfm)) { + res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; + printk(KERN_DEBUG + "%s: error %d (inode %u) allocating crypto tfm\n", + __func__, res, (unsigned) inode->i_ino); + goto out; + } + crypt_info->ci_ctfm = ctfm; + crypto_ablkcipher_clear_flags(ctfm, ~0); + crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), + CRYPTO_TFM_REQ_WEAK_KEY); + res = crypto_ablkcipher_setkey(ctfm, raw_key, + f2fs_encryption_key_size(mode)); + if (res) + goto out; + + memzero_explicit(raw_key, sizeof(raw_key)); + if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) { + f2fs_free_crypt_info(crypt_info); + goto retry; + } + return 0; + +out: + if (res == -ENOKEY && !S_ISREG(inode->i_mode)) + res = 0; + + f2fs_free_crypt_info(crypt_info); + memzero_explicit(raw_key, sizeof(raw_key)); + return res; +} + +int f2fs_has_encryption_key(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + return (fi->i_crypt_info != NULL); +} diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c new file mode 100644 index 00000000..d4a96af5 --- /dev/null +++ b/fs/f2fs/crypto_policy.c @@ -0,0 +1,209 @@ +/* + * copied from linux/fs/ext4/crypto_policy.c + * + * Copyright (C) 2015, Google, Inc. + * Copyright (C) 2015, Motorola Mobility. + * + * This contains encryption policy functions for f2fs with some modifications + * to support f2fs-specific xattr APIs. + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#include +#include +#include +#include + +#include "f2fs.h" +#include "xattr.h" + +static int f2fs_inode_has_encryption_context(struct inode *inode) +{ + int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0, NULL); + return (res > 0); +} + +/* + * check whether the policy is consistent with the encryption context + * for the inode + */ +static int f2fs_is_encryption_context_consistent_with_policy( + struct inode *inode, const struct f2fs_encryption_policy *policy) +{ + struct f2fs_encryption_context ctx; + int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, + sizeof(ctx), NULL); + + if (res != sizeof(ctx)) + return 0; + + return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE) == 0 && + (ctx.flags == policy->flags) && + (ctx.contents_encryption_mode == + policy->contents_encryption_mode) && + (ctx.filenames_encryption_mode == + policy->filenames_encryption_mode)); +} + +static int f2fs_create_encryption_context_from_policy( + struct inode *inode, const struct f2fs_encryption_policy *policy) +{ + struct f2fs_encryption_context ctx; + + ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1; + memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE); + + if (!f2fs_valid_contents_enc_mode(policy->contents_encryption_mode)) { + printk(KERN_WARNING + "%s: Invalid contents encryption mode %d\n", __func__, + policy->contents_encryption_mode); + return -EINVAL; + } + + if (!f2fs_valid_filenames_enc_mode(policy->filenames_encryption_mode)) { + printk(KERN_WARNING + "%s: Invalid filenames encryption mode %d\n", __func__, + policy->filenames_encryption_mode); + return -EINVAL; + } + + if (policy->flags & ~F2FS_POLICY_FLAGS_VALID) + return -EINVAL; + + ctx.contents_encryption_mode = policy->contents_encryption_mode; + ctx.filenames_encryption_mode = policy->filenames_encryption_mode; + ctx.flags = policy->flags; + BUILD_BUG_ON(sizeof(ctx.nonce) != F2FS_KEY_DERIVATION_NONCE_SIZE); + get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE); + + return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, + sizeof(ctx), NULL, XATTR_CREATE); +} + +int f2fs_process_policy(const struct f2fs_encryption_policy *policy, + struct inode *inode) +{ + if (policy->version != 0) + return -EINVAL; + + if (!S_ISDIR(inode->i_mode)) + return -EINVAL; + + if (!f2fs_inode_has_encryption_context(inode)) { + if (!f2fs_empty_dir(inode)) + return -ENOTEMPTY; + return f2fs_create_encryption_context_from_policy(inode, + policy); + } + + if (f2fs_is_encryption_context_consistent_with_policy(inode, policy)) + return 0; + + printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n", + __func__); + return -EINVAL; +} + +int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy) +{ + struct f2fs_encryption_context ctx; + int res; + + if (!f2fs_encrypted_inode(inode)) + return -ENODATA; + + res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, + &ctx, sizeof(ctx), NULL); + if (res != sizeof(ctx)) + return -ENODATA; + if (ctx.format != F2FS_ENCRYPTION_CONTEXT_FORMAT_V1) + return -EINVAL; + + policy->version = 0; + policy->contents_encryption_mode = ctx.contents_encryption_mode; + policy->filenames_encryption_mode = ctx.filenames_encryption_mode; + policy->flags = ctx.flags; + memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE); + return 0; +} + +int f2fs_is_child_context_consistent_with_parent(struct inode *parent, + struct inode *child) +{ + struct f2fs_crypt_info *parent_ci, *child_ci; + int res; + + if ((parent == NULL) || (child == NULL)) { + pr_err("parent %p child %p\n", parent, child); + BUG_ON(1); + } + + /* no restrictions if the parent directory is not encrypted */ + if (!f2fs_encrypted_inode(parent)) + return 1; + /* if the child directory is not encrypted, this is always a problem */ + if (!f2fs_encrypted_inode(child)) + return 0; + res = f2fs_get_encryption_info(parent); + if (res) + return 0; + res = f2fs_get_encryption_info(child); + if (res) + return 0; + parent_ci = F2FS_I(parent)->i_crypt_info; + child_ci = F2FS_I(child)->i_crypt_info; + if (!parent_ci && !child_ci) + return 1; + if (!parent_ci || !child_ci) + return 0; + + return (memcmp(parent_ci->ci_master_key, + child_ci->ci_master_key, + F2FS_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ci->ci_data_mode == child_ci->ci_data_mode) && + (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && + (parent_ci->ci_flags == child_ci->ci_flags)); +} + +/** + * f2fs_inherit_context() - Sets a child context from its parent + * @parent: Parent inode from which the context is inherited. + * @child: Child inode that inherits the context from @parent. + * + * Return: Zero on success, non-zero otherwise + */ +int f2fs_inherit_context(struct inode *parent, struct inode *child, + struct page *ipage) +{ + struct f2fs_encryption_context ctx; + struct f2fs_crypt_info *ci; + int res; + + res = f2fs_get_encryption_info(parent); + if (res < 0) + return res; + + ci = F2FS_I(parent)->i_crypt_info; + BUG_ON(ci == NULL); + + ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1; + + ctx.contents_encryption_mode = ci->ci_data_mode; + ctx.filenames_encryption_mode = ci->ci_filename_mode; + ctx.flags = ci->ci_flags; + memcpy(ctx.master_key_descriptor, ci->ci_master_key, + F2FS_KEY_DESCRIPTOR_SIZE); + + get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE); + return f2fs_setxattr(child, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, + sizeof(ctx), ipage, XATTR_CREATE); +} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8e58c4cc..eb043c30 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -18,17 +18,32 @@ #include #include #include +#include +#include #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include +static struct kmem_cache *extent_tree_slab; +static struct kmem_cache *extent_node_slab; + static void f2fs_read_end_io(struct bio *bio, int err) { struct bio_vec *bvec; int i; + if (f2fs_bio_encrypted(bio)) { + if (err) { + f2fs_release_crypto_ctx(bio->bi_private); + } else { + f2fs_end_io_crypto_work(bio->bi_private, bio); + return; + } + } + bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; @@ -52,6 +67,8 @@ static void f2fs_write_end_io(struct bio *bio, int err) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; + f2fs_restore_and_release_control_page(&page); + if (unlikely(err)) { set_page_dirty(page); set_bit(AS_EIO, &page->mapping->flags); @@ -61,11 +78,6 @@ static void f2fs_write_end_io(struct bio *bio, int err) dec_page_count(sbi, F2FS_WRITEBACK); } - if (sbi->wait_io) { - complete(sbi->wait_io); - sbi->wait_io = NULL; - } - if (!get_pages(sbi, F2FS_WRITEBACK) && !list_empty(&sbi->cp_wait.task_list)) wake_up(&sbi->cp_wait); @@ -85,9 +97,9 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, bio = bio_alloc(GFP_NOIO, npages); bio->bi_bdev = sbi->sb->s_bdev; - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); + bio->bi_sector = SECTOR_FROM_BLOCK(blk_addr); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; - bio->bi_private = sbi; + bio->bi_private = is_read ? NULL : sbi; return bio; } @@ -95,34 +107,16 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; - int rw; if (!io->bio) return; - rw = fio->rw; - - if (is_read_io(rw)) { - trace_f2fs_submit_read_bio(io->sbi->sb, rw, - fio->type, io->bio); - submit_bio(rw, io->bio); - } else { - trace_f2fs_submit_write_bio(io->sbi->sb, rw, - fio->type, io->bio); - /* - * META_FLUSH is only from the checkpoint procedure, and we - * should wait this metadata bio for FS consistency. - */ - if (fio->type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - io->sbi->wait_io = &wait; - submit_bio(rw, io->bio); - wait_for_completion(&wait); - } else { - submit_bio(rw, io->bio); - } - } + if (is_read_io(fio->rw)) + trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); + else + trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); + submit_bio(fio->rw, io->bio); io->bio = NULL; } @@ -152,15 +146,16 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, * Fill the locked page with data located in the block address. * Return unlocked page. */ -int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, int rw) +int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; + struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; - trace_f2fs_submit_page_bio(page, blk_addr, rw); + trace_f2fs_submit_page_bio(page, fio); + f2fs_trace_ios(fio, 0); /* Allocate a new bio */ - bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); + bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw)); if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { bio_put(bio); @@ -168,47 +163,51 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, return -EFAULT; } - submit_bio(rw, bio); + submit_bio(fio->rw, bio); return 0; } -void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, struct f2fs_io_info *fio) +void f2fs_submit_page_mbio(struct f2fs_io_info *fio) { + struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; bool is_read = is_read_io(fio->rw); + struct page *bio_page; io = is_read ? &sbi->read_io : &sbi->write_io[btype]; - verify_block_addr(sbi, blk_addr); + verify_block_addr(sbi, fio->blk_addr); down_write(&io->io_rwsem); if (!is_read) inc_page_count(sbi, F2FS_WRITEBACK); - if (io->bio && (io->last_block_in_bio != blk_addr - 1 || + if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 || io->fio.rw != fio->rw)) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { int bio_blocks = MAX_BIO_BLOCKS(sbi); - io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); + io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read); io->fio = *fio; } - if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < + bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; + + if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { __submit_merged_bio(io); goto alloc_new; } - io->last_block_in_bio = blk_addr; + io->last_block_in_bio = fio->blk_addr; + f2fs_trace_ios(fio, 0); up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); + trace_f2fs_submit_page_mbio(fio->page, fio); } /* @@ -217,7 +216,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, * ->node_page * update block addresses in the node page */ -static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) +void set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn; __le32 *addr_array; @@ -230,7 +229,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); - addr_array[ofs_in_node] = cpu_to_le32(new_addr); + addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); set_page_dirty(node_page); } @@ -245,8 +244,8 @@ int reserve_new_block(struct dnode_of_data *dn) trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); - __set_data_blkaddr(dn, NEW_ADDR); dn->data_blkaddr = NEW_ADDR; + set_data_blkaddr(dn); mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; @@ -257,9 +256,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) bool need_put = dn->inode_page ? false : true; int err; - /* if inode_page exists, index should be zero */ - f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); - err = get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; @@ -271,72 +267,49 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) return err; } -static int check_extent_cache(struct inode *inode, pgoff_t pgofs, - struct buffer_head *bh_result) +static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) { struct f2fs_inode_info *fi = F2FS_I(inode); pgoff_t start_fofs, end_fofs; block_t start_blkaddr; - if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return 0; - - read_lock(&fi->ext.ext_lock); + read_lock(&fi->ext_lock); if (fi->ext.len == 0) { - read_unlock(&fi->ext.ext_lock); - return 0; + read_unlock(&fi->ext_lock); + return false; } stat_inc_total_hit(inode->i_sb); start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; - start_blkaddr = fi->ext.blk_addr; + start_blkaddr = fi->ext.blk; if (pgofs >= start_fofs && pgofs <= end_fofs) { - unsigned int blkbits = inode->i_sb->s_blocksize_bits; - size_t count; - - clear_buffer_new(bh_result); - map_bh(bh_result, inode->i_sb, - start_blkaddr + pgofs - start_fofs); - count = end_fofs - pgofs + 1; - if (count < (UINT_MAX >> blkbits)) - bh_result->b_size = (count << blkbits); - else - bh_result->b_size = UINT_MAX; - + *ei = fi->ext; stat_inc_read_hit(inode->i_sb); - read_unlock(&fi->ext.ext_lock); - return 1; + read_unlock(&fi->ext_lock); + return true; } - read_unlock(&fi->ext.ext_lock); - return 0; + read_unlock(&fi->ext_lock); + return false; } -void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) +static bool update_extent_info(struct inode *inode, pgoff_t fofs, + block_t blkaddr) { - struct f2fs_inode_info *fi = F2FS_I(dn->inode); - pgoff_t fofs, start_fofs, end_fofs; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; int need_update = true; - f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + - dn->ofs_in_node; - - /* Update the page address in the parent node */ - __set_data_blkaddr(dn, blk_addr); - - if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return; - - write_lock(&fi->ext.ext_lock); + write_lock(&fi->ext_lock); start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; - start_blkaddr = fi->ext.blk_addr; - end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; + start_blkaddr = fi->ext.blk; + end_blkaddr = fi->ext.blk + fi->ext.len - 1; /* Drop and initialize the matched extent */ if (fi->ext.len == 1 && fofs == start_fofs) @@ -344,24 +317,24 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) /* Initial extent */ if (fi->ext.len == 0) { - if (blk_addr != NULL_ADDR) { + if (blkaddr != NULL_ADDR) { fi->ext.fofs = fofs; - fi->ext.blk_addr = blk_addr; + fi->ext.blk = blkaddr; fi->ext.len = 1; } goto end_update; } /* Front merge */ - if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { + if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) { fi->ext.fofs--; - fi->ext.blk_addr--; + fi->ext.blk--; fi->ext.len++; goto end_update; } /* Back merge */ - if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { + if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) { fi->ext.len++; goto end_update; } @@ -373,8 +346,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) fi->ext.len = fofs - start_fofs; } else { fi->ext.fofs = fofs + 1; - fi->ext.blk_addr = start_blkaddr + - fofs - start_fofs + 1; + fi->ext.blk = start_blkaddr + fofs - start_fofs + 1; fi->ext.len -= fofs - start_fofs + 1; } } else { @@ -388,78 +360,580 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) need_update = true; } end_update: - write_unlock(&fi->ext.ext_lock); - if (need_update) - sync_inode_page(dn); - return; + write_unlock(&fi->ext_lock); + return need_update; } -struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) +static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_info *ei, + struct rb_node *parent, struct rb_node **p) { - struct address_space *mapping = inode->i_mapping; - struct dnode_of_data dn; - struct page *page; - int err; + struct extent_node *en; - page = find_get_page(mapping, index); - if (page && PageUptodate(page)) - return page; - f2fs_put_page(page, 0); + en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); + if (!en) + return NULL; - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) - return ERR_PTR(err); - f2fs_put_dnode(&dn); + en->ei = *ei; + INIT_LIST_HEAD(&en->list); - if (dn.data_blkaddr == NULL_ADDR) - return ERR_PTR(-ENOENT); + rb_link_node(&en->rb_node, parent, p); + rb_insert_color(&en->rb_node, &et->root); + et->count++; + atomic_inc(&sbi->total_ext_node); + return en; +} - /* By fallocate(), there is no cached page, but with NEW_ADDR */ - if (unlikely(dn.data_blkaddr == NEW_ADDR)) - return ERR_PTR(-EINVAL); +static void __detach_extent_node(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_node *en) +{ + rb_erase(&en->rb_node, &et->root); + et->count--; + atomic_dec(&sbi->total_ext_node); - page = grab_cache_page(mapping, index); - if (!page) - return ERR_PTR(-ENOMEM); + if (et->cached_en == en) + et->cached_en = NULL; +} - if (PageUptodate(page)) { - unlock_page(page); - return page; +static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi, + nid_t ino) +{ + struct extent_tree *et; + + down_read(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, ino); + if (!et) { + up_read(&sbi->extent_tree_lock); + return NULL; } + atomic_inc(&et->refcount); + up_read(&sbi->extent_tree_lock); - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, - sync ? READ_SYNC : READA); - if (err) - return ERR_PTR(err); + return et; +} - if (sync) { - wait_on_page_locked(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 0); - return ERR_PTR(-EIO); +static struct extent_tree *__grab_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + nid_t ino = inode->i_ino; + + down_write(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, ino); + if (!et) { + et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); + f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); + memset(et, 0, sizeof(struct extent_tree)); + et->ino = ino; + et->root = RB_ROOT; + et->cached_en = NULL; + rwlock_init(&et->lock); + atomic_set(&et->refcount, 0); + et->count = 0; + sbi->total_ext_tree++; + } + atomic_inc(&et->refcount); + up_write(&sbi->extent_tree_lock); + + return et; +} + +static struct extent_node *__lookup_extent_tree(struct extent_tree *et, + unsigned int fofs) +{ + struct rb_node *node = et->root.rb_node; + struct extent_node *en; + + if (et->cached_en) { + struct extent_info *cei = &et->cached_en->ei; + + if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) + return et->cached_en; + } + + while (node) { + en = rb_entry(node, struct extent_node, rb_node); + + if (fofs < en->ei.fofs) { + node = node->rb_left; + } else if (fofs >= en->ei.fofs + en->ei.len) { + node = node->rb_right; + } else { + et->cached_en = en; + return en; } } - return page; + return NULL; } -/* - * If it tries to access a hole, return an error. - * Because, the callers, functions in dir.c and GC, should be able to know - * whether this page exists or not. - */ -struct page *get_lock_data_page(struct inode *inode, pgoff_t index) +static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_node *en) +{ + struct extent_node *prev; + struct rb_node *node; + + node = rb_prev(&en->rb_node); + if (!node) + return NULL; + + prev = rb_entry(node, struct extent_node, rb_node); + if (__is_back_mergeable(&en->ei, &prev->ei)) { + en->ei.fofs = prev->ei.fofs; + en->ei.blk = prev->ei.blk; + en->ei.len += prev->ei.len; + __detach_extent_node(sbi, et, prev); + return prev; + } + return NULL; +} + +static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_node *en) +{ + struct extent_node *next; + struct rb_node *node; + + node = rb_next(&en->rb_node); + if (!node) + return NULL; + + next = rb_entry(node, struct extent_node, rb_node); + if (__is_front_mergeable(&en->ei, &next->ei)) { + en->ei.len += next->ei.len; + __detach_extent_node(sbi, et, next); + return next; + } + return NULL; +} + +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_info *ei, + struct extent_node **den) +{ + struct rb_node **p = &et->root.rb_node; + struct rb_node *parent = NULL; + struct extent_node *en; + + while (*p) { + parent = *p; + en = rb_entry(parent, struct extent_node, rb_node); + + if (ei->fofs < en->ei.fofs) { + if (__is_front_mergeable(ei, &en->ei)) { + f2fs_bug_on(sbi, !den); + en->ei.fofs = ei->fofs; + en->ei.blk = ei->blk; + en->ei.len += ei->len; + *den = __try_back_merge(sbi, et, en); + return en; + } + p = &(*p)->rb_left; + } else if (ei->fofs >= en->ei.fofs + en->ei.len) { + if (__is_back_mergeable(ei, &en->ei)) { + f2fs_bug_on(sbi, !den); + en->ei.len += ei->len; + *den = __try_front_merge(sbi, et, en); + return en; + } + p = &(*p)->rb_right; + } else { + f2fs_bug_on(sbi, 1); + } + } + + return __attach_extent_node(sbi, et, ei, parent, p); +} + +static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, + struct extent_tree *et, bool free_all) +{ + struct rb_node *node, *next; + struct extent_node *en; + unsigned int count = et->count; + + node = rb_first(&et->root); + while (node) { + next = rb_next(node); + en = rb_entry(node, struct extent_node, rb_node); + + if (free_all) { + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) + list_del_init(&en->list); + spin_unlock(&sbi->extent_lock); + } + + if (free_all || list_empty(&en->list)) { + __detach_extent_node(sbi, et, en); + kmem_cache_free(extent_node_slab, en); + } + node = next; + } + + return count - et->count; +} + +static void f2fs_init_extent_tree(struct inode *inode, + struct f2fs_extent *i_ext) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + struct extent_node *en; + struct extent_info ei; + + if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) + return; + + et = __grab_extent_tree(inode); + + write_lock(&et->lock); + if (et->count) + goto out; + + set_extent_info(&ei, le32_to_cpu(i_ext->fofs), + le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); + + en = __insert_extent_tree(sbi, et, &ei, NULL); + if (en) { + et->cached_en = en; + + spin_lock(&sbi->extent_lock); + list_add_tail(&en->list, &sbi->extent_list); + spin_unlock(&sbi->extent_lock); + } +out: + write_unlock(&et->lock); + atomic_dec(&et->refcount); +} + +static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + struct extent_node *en; + + trace_f2fs_lookup_extent_tree_start(inode, pgofs); + + et = __find_extent_tree(sbi, inode->i_ino); + if (!et) + return false; + + read_lock(&et->lock); + en = __lookup_extent_tree(et, pgofs); + if (en) { + *ei = en->ei; + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) + list_move_tail(&en->list, &sbi->extent_list); + spin_unlock(&sbi->extent_lock); + stat_inc_read_hit(sbi->sb); + } + stat_inc_total_hit(sbi->sb); + read_unlock(&et->lock); + + trace_f2fs_lookup_extent_tree_end(inode, pgofs, en); + + atomic_dec(&et->refcount); + return en ? true : false; +} + +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, + block_t blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; + struct extent_node *den = NULL; + struct extent_info ei, dei; + unsigned int endofs; + + trace_f2fs_update_extent_tree(inode, fofs, blkaddr); + + et = __grab_extent_tree(inode); + + write_lock(&et->lock); + + /* 1. lookup and remove existing extent info in cache */ + en = __lookup_extent_tree(et, fofs); + if (!en) + goto update_extent; + + dei = en->ei; + __detach_extent_node(sbi, et, en); + + /* 2. if extent can be split more, split and insert the left part */ + if (dei.len > 1) { + /* insert left part of split extent into cache */ + if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) { + set_extent_info(&ei, dei.fofs, dei.blk, + fofs - dei.fofs); + en1 = __insert_extent_tree(sbi, et, &ei, NULL); + } + + /* insert right part of split extent into cache */ + endofs = dei.fofs + dei.len - 1; + if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) { + set_extent_info(&ei, fofs + 1, + fofs - dei.fofs + dei.blk, endofs - fofs); + en2 = __insert_extent_tree(sbi, et, &ei, NULL); + } + } + +update_extent: + /* 3. update extent in extent cache */ + if (blkaddr) { + set_extent_info(&ei, fofs, blkaddr, 1); + en3 = __insert_extent_tree(sbi, et, &ei, &den); + } + + /* 4. update in global extent list */ + spin_lock(&sbi->extent_lock); + if (en && !list_empty(&en->list)) + list_del(&en->list); + /* + * en1 and en2 split from en, they will become more and more smaller + * fragments after splitting several times. So if the length is smaller + * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree. + */ + if (en1) + list_add_tail(&en1->list, &sbi->extent_list); + if (en2) + list_add_tail(&en2->list, &sbi->extent_list); + if (en3) { + if (list_empty(&en3->list)) + list_add_tail(&en3->list, &sbi->extent_list); + else + list_move_tail(&en3->list, &sbi->extent_list); + } + if (den && !list_empty(&den->list)) + list_del(&den->list); + spin_unlock(&sbi->extent_lock); + + /* 5. release extent node */ + if (en) + kmem_cache_free(extent_node_slab, en); + if (den) + kmem_cache_free(extent_node_slab, den); + + write_unlock(&et->lock); + atomic_dec(&et->refcount); +} + +void f2fs_preserve_extent_tree(struct inode *inode) +{ + struct extent_tree *et; + struct extent_info *ext = &F2FS_I(inode)->ext; + bool sync = false; + + if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) + return; + + et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino); + if (!et) { + if (ext->len) { + ext->len = 0; + update_inode_page(inode); + } + return; + } + + read_lock(&et->lock); + if (et->count) { + struct extent_node *en; + + if (et->cached_en) { + en = et->cached_en; + } else { + struct rb_node *node = rb_first(&et->root); + + if (!node) + node = rb_last(&et->root); + en = rb_entry(node, struct extent_node, rb_node); + } + + if (__is_extent_same(ext, &en->ei)) + goto out; + + *ext = en->ei; + sync = true; + } else if (ext->len) { + ext->len = 0; + sync = true; + } +out: + read_unlock(&et->lock); + atomic_dec(&et->refcount); + + if (sync) + update_inode_page(inode); +} + +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +{ + struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; + struct extent_node *en, *tmp; + unsigned long ino = F2FS_ROOT_INO(sbi); + struct radix_tree_iter iter; + void **slot; + unsigned int found; + unsigned int node_cnt = 0, tree_cnt = 0; + + if (!test_opt(sbi, EXTENT_CACHE)) + return; + + if (available_free_memory(sbi, EXTENT_CACHE)) + return; + + spin_lock(&sbi->extent_lock); + list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { + if (!nr_shrink--) + break; + list_del_init(&en->list); + } + spin_unlock(&sbi->extent_lock); + + down_read(&sbi->extent_tree_lock); + while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, + (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { + unsigned i; + + ino = treevec[found - 1]->ino + 1; + for (i = 0; i < found; i++) { + struct extent_tree *et = treevec[i]; + + atomic_inc(&et->refcount); + write_lock(&et->lock); + node_cnt += __free_extent_tree(sbi, et, false); + write_unlock(&et->lock); + atomic_dec(&et->refcount); + } + } + up_read(&sbi->extent_tree_lock); + + down_write(&sbi->extent_tree_lock); + radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter, + F2FS_ROOT_INO(sbi)) { + struct extent_tree *et = (struct extent_tree *)*slot; + + if (!atomic_read(&et->refcount) && !et->count) { + radix_tree_delete(&sbi->extent_tree_root, et->ino); + kmem_cache_free(extent_tree_slab, et); + sbi->total_ext_tree--; + tree_cnt++; + } + } + up_write(&sbi->extent_tree_lock); + + trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); +} + +void f2fs_destroy_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + unsigned int node_cnt = 0; + + if (!test_opt(sbi, EXTENT_CACHE)) + return; + + et = __find_extent_tree(sbi, inode->i_ino); + if (!et) + goto out; + + /* free all extent info belong to this extent tree */ + write_lock(&et->lock); + node_cnt = __free_extent_tree(sbi, et, true); + write_unlock(&et->lock); + + atomic_dec(&et->refcount); + + /* try to find and delete extent tree entry in radix tree */ + down_write(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); + if (!et) { + up_write(&sbi->extent_tree_lock); + goto out; + } + f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); + radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); + kmem_cache_free(extent_tree_slab, et); + sbi->total_ext_tree--; + up_write(&sbi->extent_tree_lock); +out: + trace_f2fs_destroy_extent_tree(inode, node_cnt); + return; +} + +void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext) +{ + if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) + f2fs_init_extent_tree(inode, i_ext); + + write_lock(&F2FS_I(inode)->ext_lock); + get_extent_info(&F2FS_I(inode)->ext, *i_ext); + write_unlock(&F2FS_I(inode)->ext_lock); +} + +static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + return false; + + if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) + return f2fs_lookup_extent_tree(inode, pgofs, ei); + + return lookup_extent_info(inode, pgofs, ei); +} + +void f2fs_update_extent_cache(struct dnode_of_data *dn) +{ + struct f2fs_inode_info *fi = F2FS_I(dn->inode); + pgoff_t fofs; + + f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); + + if (is_inode_flag_set(fi, FI_NO_EXTENT)) + return; + + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + + if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE)) + return f2fs_update_extent_tree(dn->inode, fofs, + dn->data_blkaddr); + + if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) + sync_inode_page(dn); +} + +struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; + struct extent_info ei; int err; + struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), + .type = DATA, + .rw = rw, + .encrypted_page = NULL, + }; + + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + return read_mapping_page(mapping, index, NULL); -repeat: page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + goto got_it; + } + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) { @@ -472,9 +946,11 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) f2fs_put_page(page, 1); return ERR_PTR(-ENOENT); } - - if (PageUptodate(page)) +got_it: + if (PageUptodate(page)) { + unlock_page(page); return page; + } /* * A new dentry page is allocated but not able to be written, since its @@ -485,14 +961,58 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); + unlock_page(page); return page; } - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, - dn.data_blkaddr, READ_SYNC); + fio.blk_addr = dn.data_blkaddr; + fio.page = page; + err = f2fs_submit_page_bio(&fio); if (err) return ERR_PTR(err); + return page; +} + +struct page *find_data_page(struct inode *inode, pgoff_t index) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page; + + page = find_get_page(mapping, index); + if (page && PageUptodate(page)) + return page; + f2fs_put_page(page, 0); + + page = get_read_data_page(inode, index, READ_SYNC); + if (IS_ERR(page)) + return page; + + if (PageUptodate(page)) + return page; + + wait_on_page_locked(page); + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 0); + return ERR_PTR(-EIO); + } + return page; +} + +/* + * If it tries to access a hole, return an error. + * Because, the callers, functions in dir.c and GC, should be able to know + * whether this page exists or not. + */ +struct page *get_lock_data_page(struct inode *inode, pgoff_t index) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page; +repeat: + page = get_read_data_page(inode, index, READ_SYNC); + if (IS_ERR(page)) + return page; + /* wait for read completion */ lock_page(page); if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); @@ -520,42 +1040,37 @@ struct page *get_new_data_page(struct inode *inode, struct page *page; struct dnode_of_data dn; int err; +repeat: + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, index); - if (err) + if (err) { + f2fs_put_page(page, 1); return ERR_PTR(err); -repeat: - page = grab_cache_page(mapping, index); - if (!page) { - err = -ENOMEM; - goto put_err; } + if (!ipage) + f2fs_put_dnode(&dn); if (PageUptodate(page)) - return page; + goto got_it; if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); } else { - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, - dn.data_blkaddr, READ_SYNC); - if (err) - goto put_err; + f2fs_put_page(page, 1); - lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - err = -EIO; - goto put_err; - } - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); + page = get_read_data_page(inode, index, READ_SYNC); + if (IS_ERR(page)) goto repeat; - } - } + /* wait for read completion */ + lock_page(page); + } +got_it: if (new_i_size && i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); @@ -563,10 +1078,6 @@ struct page *get_new_data_page(struct inode *inode, set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); } return page; - -put_err: - f2fs_put_dnode(&dn); - return ERR_PTR(err); } static int __allocate_data_block(struct dnode_of_data *dn) @@ -574,30 +1085,32 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_inode_info *fi = F2FS_I(dn->inode); struct f2fs_summary sum; - block_t new_blkaddr; struct node_info ni; + int seg = CURSEG_WARM_DATA; pgoff_t fofs; - int type; if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; + + dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + if (dn->data_blkaddr == NEW_ADDR) + goto alloc; + if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; - __set_data_blkaddr(dn, NEW_ADDR); - dn->data_blkaddr = NEW_ADDR; - +alloc: get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - type = CURSEG_WARM_DATA; + if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) + seg = CURSEG_DIRECT_IO; - allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); + allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, + &sum, seg); /* direct IO doesn't use extent cache to maximize the performance */ - set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); - update_extent_cache(new_blkaddr, dn); - clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); + set_data_blkaddr(dn); /* update i_size */ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + @@ -605,39 +1118,97 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); - dn->data_blkaddr = new_blkaddr; return 0; } +static void __allocate_data_blocks(struct inode *inode, loff_t offset, + size_t count) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + u64 start = F2FS_BYTES_TO_BLK(offset); + u64 len = F2FS_BYTES_TO_BLK(count); + bool allocated; + u64 end_offset; + + while (len) { + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); + + /* When reading holes, we need its node page */ + set_new_dnode(&dn, inode, NULL, NULL, 0); + if (get_dnode_of_data(&dn, start, ALLOC_NODE)) + goto out; + + allocated = false; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); + + while (dn.ofs_in_node < end_offset && len) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) { + if (__allocate_data_block(&dn)) + goto sync_out; + allocated = true; + } + len--; + start++; + dn.ofs_in_node++; + } + + if (allocated) + sync_inode_page(&dn); + + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + } + return; + +sync_out: + if (allocated) + sync_inode_page(&dn); + f2fs_put_dnode(&dn); +out: + f2fs_unlock_op(sbi); + return; +} + /* - * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. + * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with + * f2fs_map_blocks structure. * If original data blocks are allocated, then give them to blockdev. * Otherwise, * a. preallocate requested block addresses * b. do not use extent cache for better performance * c. give the block addresses to blockdev */ -static int __get_data_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create, bool fiemap) +static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, + int create, bool fiemap) { - unsigned int blkbits = inode->i_sb->s_blocksize_bits; - unsigned maxblocks = bh_result->b_size >> blkbits; + unsigned int maxblocks = map->m_len; struct dnode_of_data dn; int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; pgoff_t pgofs, end_offset; int err = 0, ofs = 1; + struct extent_info ei; bool allocated = false; - /* Get the page offset from the block offset(iblock) */ - pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); + map->m_len = 0; + map->m_flags = 0; - if (check_extent_cache(inode, pgofs, bh_result)) + /* it only supports block size == page size */ + pgofs = (pgoff_t)map->m_lblk; + + if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) { + map->m_pblk = ei.blk + pgofs - ei.fofs; + map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); + map->m_flags = F2FS_MAP_MAPPED; goto out; + } - if (create) { - f2fs_balance_fs(F2FS_I_SB(inode)); + if (create) f2fs_lock_op(F2FS_I_SB(inode)); - } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -651,19 +1222,23 @@ static int __get_data_block(struct inode *inode, sector_t iblock, goto put_out; if (dn.data_blkaddr != NULL_ADDR) { - map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + map->m_flags = F2FS_MAP_MAPPED; + map->m_pblk = dn.data_blkaddr; + if (dn.data_blkaddr == NEW_ADDR) + map->m_flags |= F2FS_MAP_UNWRITTEN; } else if (create) { err = __allocate_data_block(&dn); if (err) goto put_out; allocated = true; - map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED; + map->m_pblk = dn.data_blkaddr; } else { goto put_out; } end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); - bh_result->b_size = (((size_t)1) << blkbits); + map->m_len = 1; dn.ofs_in_node++; pgofs++; @@ -687,21 +1262,25 @@ static int __get_data_block(struct inode *inode, sector_t iblock, end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); } - if (maxblocks > (bh_result->b_size >> blkbits)) { + if (maxblocks > map->m_len) { block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); if (blkaddr == NULL_ADDR && create) { err = __allocate_data_block(&dn); if (err) goto sync_out; allocated = true; + map->m_flags |= F2FS_MAP_NEW; blkaddr = dn.data_blkaddr; } /* Give more consecutive addresses for the readahead */ - if (blkaddr == (bh_result->b_blocknr + ofs)) { + if ((map->m_pblk != NEW_ADDR && + blkaddr == (map->m_pblk + ofs)) || + (map->m_pblk == NEW_ADDR && + blkaddr == NEW_ADDR)) { ofs++; dn.ofs_in_node++; pgofs++; - bh_result->b_size += (((size_t)1) << blkbits); + map->m_len++; goto get_next; } } @@ -714,10 +1293,28 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (create) f2fs_unlock_op(F2FS_I_SB(inode)); out: - trace_f2fs_get_data_block(inode, iblock, bh_result, err); + trace_f2fs_map_blocks(inode, map, err); return err; } +static int __get_data_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create, bool fiemap) +{ + struct f2fs_map_blocks map; + int ret; + + map.m_lblk = iblock; + map.m_len = bh->b_size >> inode->i_blkbits; + + ret = f2fs_map_blocks(inode, &map, create, fiemap); + if (!ret) { + map_bh(bh, inode->i_sb, map.m_pblk); + bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; + bh->b_size = map.m_len << inode->i_blkbits; + } + return ret; +} + static int get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -730,26 +1327,282 @@ static int get_data_block_fiemap(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, true); } +static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) +{ + return (offset >> inode->i_blkbits); +} + +static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) +{ + return (blk << inode->i_blkbits); +} + int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { - return generic_block_fiemap(inode, fieinfo, - start, len, get_data_block_fiemap); + struct buffer_head map_bh; + sector_t start_blk, last_blk; + loff_t isize = i_size_read(inode); + u64 logical = 0, phys = 0, size = 0; + u32 flags = 0; + bool past_eof = false, whole_file = false; + int ret = 0; + + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (ret) + return ret; + + mutex_lock(&inode->i_mutex); + + if (len >= isize) { + whole_file = true; + len = isize; + } + + if (logical_to_blk(inode, len) == 0) + len = blk_to_logical(inode, 1); + + start_blk = logical_to_blk(inode, start); + last_blk = logical_to_blk(inode, start + len - 1); +next: + memset(&map_bh, 0, sizeof(struct buffer_head)); + map_bh.b_size = len; + + ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0); + if (ret) + goto out; + + /* HOLE */ + if (!buffer_mapped(&map_bh)) { + start_blk++; + + if (!past_eof && blk_to_logical(inode, start_blk) >= isize) + past_eof = 1; + + if (past_eof && size) { + flags |= FIEMAP_EXTENT_LAST; + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + } else if (size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + size = 0; + } + + /* if we have holes up to/past EOF then we're done */ + if (start_blk > last_blk || past_eof || ret) + goto out; + } else { + if (start_blk > last_blk && !whole_file) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + goto out; + } + + /* + * if size != 0 then we know we already have an extent + * to add, so add it. + */ + if (size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + if (ret) + goto out; + } + + logical = blk_to_logical(inode, start_blk); + phys = blk_to_logical(inode, map_bh.b_blocknr); + size = map_bh.b_size; + flags = 0; + if (buffer_unwritten(&map_bh)) + flags = FIEMAP_EXTENT_UNWRITTEN; + + start_blk += logical_to_blk(inode, size); + + /* + * If we are past the EOF, then we need to make sure as + * soon as we find a hole that the last extent we found + * is marked with FIEMAP_EXTENT_LAST + */ + if (!past_eof && logical + size >= isize) + past_eof = true; + } + cond_resched(); + if (fatal_signal_pending(current)) + ret = -EINTR; + else + goto next; +out: + if (ret == 1) + ret = 0; + + mutex_unlock(&inode->i_mutex); + return ret; +} + +/* + * This function was originally taken from fs/mpage.c, and customized for f2fs. + * Major change was from block_size == page_size in f2fs by default. + */ +static int f2fs_mpage_readpages(struct address_space *mapping, + struct list_head *pages, struct page *page, + unsigned nr_pages) +{ + struct bio *bio = NULL; + unsigned page_idx; + sector_t last_block_in_bio = 0; + struct inode *inode = mapping->host; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + sector_t block_in_file; + sector_t last_block; + sector_t last_block_in_file; + sector_t block_nr; + struct block_device *bdev = inode->i_sb->s_bdev; + struct f2fs_map_blocks map; + + map.m_pblk = 0; + map.m_lblk = 0; + map.m_len = 0; + map.m_flags = 0; + + for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { + + prefetchw(&page->flags); + if (pages) { + page = list_entry(pages->prev, struct page, lru); + list_del(&page->lru); + if (add_to_page_cache_lru(page, mapping, + page->index, GFP_KERNEL)) + goto next_page; + } + + block_in_file = (sector_t)page->index; + last_block = block_in_file + nr_pages; + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + blkbits; + if (last_block > last_block_in_file) + last_block = last_block_in_file; + + /* + * Map blocks using the previous result first. + */ + if ((map.m_flags & F2FS_MAP_MAPPED) && + block_in_file > map.m_lblk && + block_in_file < (map.m_lblk + map.m_len)) + goto got_it; + + /* + * Then do more f2fs_map_blocks() calls until we are + * done with this page. + */ + map.m_flags = 0; + + if (block_in_file < last_block) { + map.m_lblk = block_in_file; + map.m_len = last_block - block_in_file; + + if (f2fs_map_blocks(inode, &map, 0, false)) + goto set_error_page; + } +got_it: + if ((map.m_flags & F2FS_MAP_MAPPED)) { + block_nr = map.m_pblk + block_in_file - map.m_lblk; + SetPageMappedToDisk(page); + + if (!PageUptodate(page) && !cleancache_get_page(page)) { + SetPageUptodate(page); + goto confused; + } + } else { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + unlock_page(page); + goto next_page; + } + + /* + * This page will go to BIO. Do we need to send this + * BIO off first? + */ + if (bio && (last_block_in_bio != block_nr - 1)) { +submit_and_realloc: + submit_bio(READ, bio); + bio = NULL; + } + if (bio == NULL) { + struct f2fs_crypto_ctx *ctx = NULL; + + if (f2fs_encrypted_inode(inode) && + S_ISREG(inode->i_mode)) { + struct page *cpage; + + ctx = f2fs_get_crypto_ctx(inode); + if (IS_ERR(ctx)) + goto set_error_page; + + /* wait the page to be moved by cleaning */ + cpage = find_lock_page( + META_MAPPING(F2FS_I_SB(inode)), + block_nr); + if (cpage) { + f2fs_wait_on_page_writeback(cpage, + DATA); + f2fs_put_page(cpage, 1); + } + } + + bio = bio_alloc(GFP_KERNEL, + min_t(int, nr_pages, bio_get_nr_vecs(bdev))); + if (!bio) { + if (ctx) + f2fs_release_crypto_ctx(ctx); + goto set_error_page; + } + bio->bi_bdev = bdev; + bio->bi_sector = SECTOR_FROM_BLOCK(block_nr); + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = ctx; + } + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + last_block_in_bio = block_nr; + goto next_page; +set_error_page: + SetPageError(page); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + unlock_page(page); + goto next_page; +confused: + if (bio) { + submit_bio(READ, bio); + bio = NULL; + } + unlock_page(page); +next_page: + if (pages) + page_cache_release(page); + } + BUG_ON(pages && !list_empty(pages)); + if (bio) + submit_bio(READ, bio); + return 0; } static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; - int ret; + int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); - else - ret = mpage_readpage(page, get_data_block); - + if (ret == -EAGAIN) + ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1); return ret; } @@ -763,13 +1616,13 @@ static int f2fs_read_data_pages(struct file *file, if (f2fs_has_inline_data(inode)) return 0; - return mpage_readpages(mapping, pages, nr_pages, get_data_block); + return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); } -int do_write_data_page(struct page *page, struct f2fs_io_info *fio) +int do_write_data_page(struct f2fs_io_info *fio) { + struct page *page = fio->page; struct inode *inode = page->mapping->host; - block_t old_blkaddr, new_blkaddr; struct dnode_of_data dn; int err = 0; @@ -778,11 +1631,21 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) if (err) return err; - old_blkaddr = dn.data_blkaddr; + fio->blk_addr = dn.data_blkaddr; /* This page is already truncated */ - if (old_blkaddr == NULL_ADDR) + if (fio->blk_addr == NULL_ADDR) { + ClearPageUptodate(page); goto out_writepage; + } + + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + fio->encrypted_page = f2fs_encrypt(inode, fio->page); + if (IS_ERR(fio->encrypted_page)) { + err = PTR_ERR(fio->encrypted_page); + goto out_writepage; + } + } set_page_writeback(page); @@ -790,15 +1653,20 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (unlikely(old_blkaddr != NEW_ADDR && + if (unlikely(fio->blk_addr != NEW_ADDR && !is_cold_data(page) && need_inplace_update(inode))) { - rewrite_data_page(page, old_blkaddr, fio); + rewrite_data_page(fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); + trace_f2fs_do_write_data_page(page, IPU); } else { - write_data_page(page, &dn, &new_blkaddr, fio); - update_extent_cache(new_blkaddr, &dn); + write_data_page(&dn, fio); + set_data_blkaddr(&dn); + f2fs_update_extent_cache(&dn); + trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); } out_writepage: f2fs_put_dnode(&dn); @@ -817,8 +1685,11 @@ static int f2fs_write_data_page(struct page *page, bool need_balance_fs = false; int err = 0; struct f2fs_io_info fio = { + .sbi = sbi, .type = DATA, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .page = page, + .encrypted_page = NULL, }; trace_f2fs_writepage(page, DATA); @@ -836,21 +1707,25 @@ static int f2fs_write_data_page(struct page *page, zero_user_segment(page, offset, PAGE_CACHE_SIZE); write: - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto redirty_out; + if (f2fs_is_drop_cache(inode)) + goto out; + if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && + available_free_memory(sbi, BASE_CHECK)) goto redirty_out; /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; - err = do_write_data_page(page, &fio); + err = do_write_data_page(&fio); goto done; } /* we should bypass data pages to proceed the kworkder jobs */ if (unlikely(f2fs_cp_error(sbi))) { SetPageError(page); - unlock_page(page); goto out; } @@ -859,11 +1734,12 @@ static int f2fs_write_data_page(struct page *page, else if (has_not_enough_free_secs(sbi, 0)) goto redirty_out; + err = -EAGAIN; f2fs_lock_op(sbi); - if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) - err = f2fs_write_inline_data(inode, page, offset); - else - err = do_write_data_page(page, &fio); + if (f2fs_has_inline_data(inode)) + err = f2fs_write_inline_data(inode, page); + if (err == -EAGAIN) + err = do_write_data_page(&fio); f2fs_unlock_op(sbi); done: if (err && err != -ENOENT) @@ -872,6 +1748,8 @@ static int f2fs_write_data_page(struct page *page, clear_cold_data(page); out: inode_dec_dirty_pages(inode); + if (err) + ClearPageUptodate(page); unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); @@ -913,6 +1791,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; + /* during POR, we don't need to trigger writepage at all. */ + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + diff = nr_pages_to_write(sbi, DATA, wbc); if (!S_ISDIR(inode->i_mode)) { @@ -940,7 +1822,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, inode->i_size); + truncate_pagecache(inode, 0, inode->i_size); truncate_blocks(inode, inode->i_size, true); } } @@ -951,7 +1833,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; + struct page *page, *ipage; pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; int err = 0; @@ -959,45 +1841,60 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); f2fs_balance_fs(sbi); -repeat: - err = f2fs_convert_inline_data(inode, pos + len, NULL); - if (err) - goto fail; + /* + * We should check this at this moment to avoid deadlock on inode page + * and #0 page. The locking rule for inline_data conversion should be: + * lock_page(page #0) -> lock_page(inode_page) + */ + if (index != 0) { + err = f2fs_convert_inline_inode(inode); + if (err) + goto fail; + } +repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { err = -ENOMEM; goto fail; } - /* to avoid latency during memory pressure */ - unlock_page(page); - *pagep = page; - if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) - goto inline_data; - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, index); - f2fs_unlock_op(sbi); - if (err) { - f2fs_put_page(page, 0); - goto fail; - } -inline_data: - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); - goto repeat; + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto unlock_fail; } - f2fs_wait_on_page_writeback(page, DATA); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) { + if (pos + len <= MAX_INLINE_DATA) { + read_inline_data(page, ipage); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); + goto put_next; + } + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto put_fail; + } + err = f2fs_reserve_block(&dn, index); + if (err) + goto put_fail; +put_next: + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; + f2fs_wait_on_page_writeback(page, DATA); + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { unsigned start = pos & (PAGE_CACHE_SIZE - 1); unsigned end = start + len; @@ -1010,18 +1907,17 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - if (f2fs_has_inline_data(inode)) { - err = f2fs_read_inline_data(inode, page); - if (err) { - page_cache_release(page); - goto fail; - } - } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); - if (err) - goto fail; - } + struct f2fs_io_info fio = { + .sbi = sbi, + .type = DATA, + .rw = READ_SYNC, + .blk_addr = dn.data_blkaddr, + .page = page, + .encrypted_page = NULL, + }; + err = f2fs_submit_page_bio(&fio); + if (err) + goto fail; lock_page(page); if (unlikely(!PageUptodate(page))) { @@ -1033,11 +1929,26 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_put_page(page, 1); goto repeat; } + + /* avoid symlink page */ + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + err = f2fs_decrypt_one(inode, page); + if (err) { + f2fs_put_page(page, 1); + goto fail; + } + } } out: SetPageUptodate(page); clear_cold_data(page); return 0; + +put_fail: + f2fs_put_dnode(&dn); +unlock_fail: + f2fs_unlock_op(sbi); + f2fs_put_page(page, 1); fail: f2fs_write_failed(mapping, pos + len); return err; @@ -1052,10 +1963,7 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) - register_inmem_page(inode, page); - else - set_page_dirty(page); + set_page_dirty(page); if (pos + copied > i_size_read(inode)) { i_size_write(inode, pos + copied); @@ -1068,9 +1976,10 @@ static int f2fs_write_end(struct file *file, } static int check_direct_IO(struct inode *inode, int rw, - struct iov_iter *iter, loff_t offset) + const struct iovec *iov, loff_t offset, unsigned long nr_segs) { unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; + int i; if (rw == READ) return 0; @@ -1078,31 +1987,42 @@ static int check_direct_IO(struct inode *inode, int rw, if (offset & blocksize_mask) return -EINVAL; - if (iov_iter_alignment(iter) & blocksize_mask) - return -EINVAL; - + for (i = 0; i < nr_segs; i++) + if (iov[i].iov_len & blocksize_mask) + return -EINVAL; return 0; } static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - size_t count = iov_iter_count(iter); + size_t count = iov_length(iov, nr_segs); int err; - /* Let buffer I/O handle the inline data case. */ - if (f2fs_has_inline_data(inode)) + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) return 0; - if (check_direct_IO(inode, rw, iter, offset)) + if (check_direct_IO(inode, rw, iov, offset, nr_segs)) return 0; trace_f2fs_direct_IO_enter(inode, offset, count, rw); - err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); + if (rw & WRITE) + __allocate_data_blocks(inode, offset, count); + + err = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + get_data_block); if (err < 0 && (rw & WRITE)) f2fs_write_failed(mapping, offset + count); @@ -1111,21 +2031,31 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, return err; } -static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, - unsigned int length) +void f2fs_invalidate_page(struct page *page, unsigned long offset) { struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) + if (inode->i_ino >= F2FS_ROOT_INO(sbi) && (offset % PAGE_CACHE_SIZE)) return; - if (PageDirty(page)) - inode_dec_dirty_pages(inode); + if (PageDirty(page)) { + if (inode->i_ino == F2FS_META_INO(sbi)) + dec_page_count(sbi, F2FS_DIRTY_META); + else if (inode->i_ino == F2FS_NODE_INO(sbi)) + dec_page_count(sbi, F2FS_DIRTY_NODES); + else + inode_dec_dirty_pages(inode); + } ClearPagePrivate(page); } -static int f2fs_release_data_page(struct page *page, gfp_t wait) +int f2fs_release_page(struct page *page, gfp_t wait) { + /* If this is dirty page, keep PagePrivate */ + if (PageDirty(page)) + return 0; + ClearPagePrivate(page); return 1; } @@ -1138,6 +2068,12 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); SetPageUptodate(page); + + if (f2fs_is_atomic_file(inode)) { + register_inmem_page(inode, page); + return 1; + } + mark_inode_dirty(inode); if (!PageDirty(page)) { @@ -1152,12 +2088,50 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; - if (f2fs_has_inline_data(inode)) - return 0; + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + /* Block number less than F2FS MAX BLOCKS */ + if (unlikely(block >= F2FS_I_SB(inode)->max_file_blocks)) + return -EFBIG; return generic_block_bmap(mapping, block, get_data_block); } +void init_extent_cache_info(struct f2fs_sb_info *sbi) +{ + INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); + init_rwsem(&sbi->extent_tree_lock); + INIT_LIST_HEAD(&sbi->extent_list); + spin_lock_init(&sbi->extent_lock); + sbi->total_ext_tree = 0; + atomic_set(&sbi->total_ext_node, 0); +} + +int __init create_extent_cache(void) +{ + extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", + sizeof(struct extent_tree)); + if (!extent_tree_slab) + return -ENOMEM; + extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", + sizeof(struct extent_node)); + if (!extent_node_slab) { + kmem_cache_destroy(extent_tree_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_extent_cache(void) +{ + kmem_cache_destroy(extent_node_slab); + kmem_cache_destroy(extent_tree_slab); +} + const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, .readpages = f2fs_read_data_pages, @@ -1166,8 +2140,8 @@ const struct address_space_operations f2fs_dblock_aops = { .write_begin = f2fs_write_begin, .write_end = f2fs_write_end, .set_page_dirty = f2fs_set_data_page_dirty, - .invalidatepage = f2fs_invalidate_data_page, - .releasepage = f2fs_release_data_page, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, .direct_IO = f2fs_direct_IO, .bmap = f2fs_bmap, }; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0a91ab81..75176e0d 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -35,17 +35,22 @@ static void update_general_status(struct f2fs_sb_info *sbi) /* validation check of the segment numbers */ si->hit_ext = sbi->read_hit_ext; si->total_ext = sbi->total_hit_ext; + si->ext_tree = sbi->total_ext_tree; + si->ext_node = atomic_read(&sbi->total_ext_node); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); + si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); si->valid_count = valid_user_blocks(sbi); si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); - si->inline_inode = sbi->inline_inode; + si->inline_inode = atomic_read(&sbi->inline_inode); + si->inline_dir = atomic_read(&sbi->inline_dir); si->utilization = utilization(sbi); si->free_segs = free_segments(sbi); @@ -55,7 +60,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->node_pages = NODE_MAPPING(sbi)->nrpages; si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; - si->sits = SIT_I(sbi)->dirty_sentries; + si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; + si->sits = MAIN_SEGS(sbi); + si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->fnids = NM_I(sbi)->fcnt; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -77,6 +84,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; } + + si->inplace_count = atomic_read(&sbi->inplace_count); } /* @@ -85,7 +94,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) static void update_sit_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; + unsigned long long blks_per_sec, hblks_per_sec, total_vblocks; + unsigned long long bimodal, dist; unsigned int segno, vblocks; int ndirty = 0; @@ -103,10 +113,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi) ndirty++; } } - dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; - si->bimodal = bimodal / dist; + dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); + si->bimodal = div_u64(bimodal, dist); if (si->dirty_count) - si->avg_vblocks = total_vblocks / ndirty; + si->avg_vblocks = div_u64(total_vblocks, ndirty); else si->avg_vblocks = 0; } @@ -118,6 +128,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned npages; + int i; if (si->base_mem) goto get_cache; @@ -133,7 +144,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct sit_info); si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); - si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += 3 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += SIT_VBLOCK_MAP_SIZE; if (sbi->segs_per_sec > 1) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); @@ -156,19 +168,35 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); +get_cache: + si->cache_mem = 0; + /* build gc */ - si->base_mem += sizeof(struct f2fs_gc_kthread); + if (sbi->gc_thread) + si->cache_mem += sizeof(struct f2fs_gc_kthread); + + /* build merge flush thread */ + if (SM_I(sbi)->cmd_control_info) + si->cache_mem += sizeof(struct flush_cmd_control); -get_cache: /* free nids */ - si->cache_mem = NM_I(sbi)->fcnt; - si->cache_mem += NM_I(sbi)->nat_cnt; + si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid); + si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); + si->cache_mem += NM_I(sbi)->dirty_nat_cnt * + sizeof(struct nat_entry_set); + si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); + si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); + for (i = 0; i <= UPDATE_INO; i++) + si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); + si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree); + si->cache_mem += atomic_read(&sbi->total_ext_node) * + sizeof(struct extent_node); + + si->page_mem = 0; npages = NODE_MAPPING(sbi)->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; + si->page_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry); - si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); + si->page_mem += npages << PAGE_CACHE_SHIFT; } static int stat_show(struct seq_file *s, void *v) @@ -200,6 +228,8 @@ static int stat_show(struct seq_file *s, void *v) si->valid_count - si->valid_node_count); seq_printf(s, " - Inline_data Inode: %u\n", si->inline_inode); + seq_printf(s, " - Inline_dentry Inode: %u\n", + si->inline_dir); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); @@ -236,22 +266,31 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "CP calls: %d\n", si->cp_count); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); - seq_printf(s, " - data segments : %d\n", si->data_segs); - seq_printf(s, " - node segments : %d\n", si->node_segs); - seq_printf(s, "Try to move %d blocks\n", si->tot_blks); - seq_printf(s, " - data blocks : %d\n", si->data_blks); - seq_printf(s, " - node blocks : %d\n", si->node_blks); + seq_printf(s, " - data segments : %d (%d)\n", + si->data_segs, si->bg_data_segs); + seq_printf(s, " - node segments : %d (%d)\n", + si->node_segs, si->bg_node_segs); + seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks, + si->bg_data_blks + si->bg_node_blks); + seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks, + si->bg_data_blks); + seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, + si->bg_node_blks); seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); + seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); + seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - inmem: %4d, wb: %4d\n", + si->inmem_pages, si->wb_pages); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", si->ndirty_dent, si->ndirty_dirs); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); - seq_printf(s, " - NATs: %9d\n - SITs: %9d\n", - si->nats, si->sits); + seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", + si->dirty_nats, si->nats, si->dirty_sits, si->sits); seq_printf(s, " - free_nids: %9d\n", si->fnids); seq_puts(s, "\nDistribution of User Blocks:"); @@ -269,6 +308,7 @@ static int stat_show(struct seq_file *s, void *v) for (j = 0; j < si->util_free; j++) seq_putc(s, '-'); seq_puts(s, "]\n\n"); + seq_printf(s, "IPU: %u blocks\n", si->inplace_count); seq_printf(s, "SSR: %u blocks in %u segments\n", si->block_count[SSR], si->segment_count[SSR]); seq_printf(s, "LFS: %u blocks in %u segments\n", @@ -281,9 +321,14 @@ static int stat_show(struct seq_file *s, void *v) /* memory footprint */ update_mem_info(si->sbi); - seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", - (si->base_mem + si->cache_mem) >> 10, - si->base_mem >> 10, si->cache_mem >> 10); + seq_printf(s, "\nMemory: %u KB\n", + (si->base_mem + si->cache_mem + si->page_mem) >> 10); + seq_printf(s, " - static: %u KB\n", + si->base_mem >> 10); + seq_printf(s, " - cached: %u KB\n", + si->cache_mem >> 10); + seq_printf(s, " - paged : %u KB\n", + si->page_mem >> 10); } mutex_unlock(&f2fs_stat_mutex); return 0; @@ -321,6 +366,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->sbi = sbi; sbi->stat_info = si; + atomic_set(&sbi->inline_inode, 0); + atomic_set(&sbi->inline_dir, 0); + atomic_set(&sbi->inplace_count, 0); + mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); mutex_unlock(&f2fs_stat_mutex); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b54f8714..7db480de 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -9,12 +9,17 @@ * published by the Free Software Foundation. */ #include +#include #include #include "f2fs.h" #include "node.h" #include "acl.h" #include "xattr.h" +#ifndef LOOKUP_NOCASE +#define LOOKUP_NOCASE 0 +#endif + static unsigned long dir_blocks(struct inode *inode) { return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1)) @@ -37,7 +42,7 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } -static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { +unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { [F2FS_FT_UNKNOWN] = DT_UNKNOWN, [F2FS_FT_REG_FILE] = DT_REG, [F2FS_FT_DIR] = DT_DIR, @@ -59,9 +64,8 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +void set_de_type(struct f2fs_dir_entry *de, umode_t mode) { - umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } @@ -77,79 +81,107 @@ static unsigned long dir_block_index(unsigned int level, return bidx; } -static bool early_match_name(size_t namelen, f2fs_hash_t namehash, - struct f2fs_dir_entry *de) +static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, + struct f2fs_filename *fname, + f2fs_hash_t namehash, + int *max_slots, + struct page **res_page, + unsigned int flags) { - if (le16_to_cpu(de->name_len) != namelen) - return false; + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; - if (de->hash_code != namehash) - return false; + dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); - return true; + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + de = find_target_dentry(fname, namehash, max_slots, &d, flags); + if (de) + *res_page = dentry_page; + else + kunmap(dentry_page); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0); + return de; } -static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - struct qstr *name, int *max_slots, - f2fs_hash_t namehash, struct page **res_page) +struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname, + f2fs_hash_t namehash, int *max_slots, + struct f2fs_dentry_ptr *d, unsigned int flags) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; - struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); - const void *dentry_bits = &dentry_blk->dentry_bitmap; int max_len = 0; + struct f2fs_str de_name = FSTR_INIT(NULL, 0); + struct f2fs_str *name = &fname->disk_name; - while (bit_pos < NR_DENTRY_IN_BLOCK) { - if (!test_bit_le(bit_pos, dentry_bits)) { - if (bit_pos == 0) - max_len = 1; - else if (!test_bit_le(bit_pos - 1, dentry_bits)) - max_len++; + if (max_slots) + *max_slots = 0; + while (bit_pos < d->max) { + if (!test_bit_le(bit_pos, d->bitmap)) { bit_pos++; + max_len++; continue; } - de = &dentry_blk->dentry[bit_pos]; - if (early_match_name(name->len, namehash, de)) { - if (!memcmp(dentry_blk->filename[bit_pos], - name->name, - name->len)) { - *res_page = dentry_page; + + de = &d->dentry[bit_pos]; + + /* encrypted case */ + de_name.name = d->filename[bit_pos]; + de_name.len = le16_to_cpu(de->name_len); + + /* show encrypted name */ + if (fname->hash) { + if (de->hash_code == fname->hash) goto found; - } + } else if (de_name.len == name->len && + de->hash_code == namehash && + !memcmp(de_name.name, name->name, name->len)) { + goto found; + } else if (flags & LOOKUP_NOCASE && de_name.len == name->len && + !strncasecmp(de_name.name, name->name, name->len)) { + goto found; } - if (max_len > *max_slots) { + + if (max_slots && max_len > *max_slots) *max_slots = max_len; - max_len = 0; - } + max_len = 0; - /* - * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs has occurred. - */ - f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); + /* remain bug on condition */ + if (unlikely(!de->name_len)) + d->max = -1; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; - kunmap(dentry_page); found: - if (max_len > *max_slots) + if (max_slots && max_len > *max_slots) *max_slots = max_len; return de; } static struct f2fs_dir_entry *find_in_level(struct inode *dir, - unsigned int level, struct qstr *name, - f2fs_hash_t namehash, struct page **res_page) + unsigned int level, + struct f2fs_filename *fname, + struct page **res_page, + unsigned int flags) { - int s = GET_DENTRY_SLOTS(name->len); + struct qstr name = FSTR_TO_QSTR(&fname->disk_name); + int s = GET_DENTRY_SLOTS(name.len); unsigned int nbucket, nblock; unsigned int bidx, end_block; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; - int max_slots = 0; + int max_slots; + f2fs_hash_t namehash; + + namehash = f2fs_dentry_hash(&name); f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); @@ -162,14 +194,14 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, for (; bidx < end_block; bidx++) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = find_data_page(dir, bidx, true); + dentry_page = find_data_page(dir, bidx); if (IS_ERR(dentry_page)) { room = true; continue; } - de = find_in_block(dentry_page, name, &max_slots, - namehash, res_page); + de = find_in_block(dentry_page, fname, namehash, &max_slots, + res_page, flags); if (de) break; @@ -192,32 +224,39 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, * and the entry itself. Page is returned mapped and unlocked. * Entry is guaranteed to be valid. */ -struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, - struct qstr *child, struct page **res_page) +struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct qstr *child, + struct page **res_page, unsigned int flags) { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; - f2fs_hash_t name_hash; unsigned int max_depth; unsigned int level; + struct f2fs_filename fname; + int err; - if (npages == 0) + *res_page = NULL; + + err = f2fs_fname_setup_filename(dir, child, 1, &fname); + if (err) return NULL; - *res_page = NULL; + if (f2fs_has_inline_dentry(dir)) { + de = find_in_inline_dir(dir, &fname, res_page, flags); + goto out; + } + + if (npages == 0) + goto out; - name_hash = f2fs_dentry_hash(child); max_depth = F2FS_I(dir)->i_current_depth; for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, child, name_hash, res_page); + de = find_in_level(dir, level, &fname, res_page, flags); if (de) break; } - if (!de && F2FS_I(dir)->chash != name_hash) { - F2FS_I(dir)->chash = name_hash; - F2FS_I(dir)->clevel = level - 1; - } +out: + f2fs_fname_free_filename(&fname); return de; } @@ -227,6 +266,9 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) struct f2fs_dir_entry *de; struct f2fs_dentry_block *dentry_blk; + if (f2fs_has_inline_dentry(dir)) + return f2fs_parent_inline_dir(dir, p); + page = get_lock_data_page(dir, 0); if (IS_ERR(page)) return NULL; @@ -244,10 +286,10 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) struct f2fs_dir_entry *de; struct page *page; - de = f2fs_find_entry(dir, qstr, &page); + de = f2fs_find_entry(dir, qstr, &page, 0); if (de) { res = le32_to_cpu(de->ino); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); } @@ -257,11 +299,12 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode) { + enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, DATA); + f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); - kunmap(page); + set_de_type(de, inode->i_mode); + f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -282,10 +325,14 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -int update_dent_inode(struct inode *inode, const struct qstr *name) +int update_dent_inode(struct inode *inode, struct inode *to, + const struct qstr *name) { struct page *page; + if (file_enc_name(to)) + return 0; + page = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(page)) return PTR_ERR(page); @@ -296,36 +343,48 @@ int update_dent_inode(struct inode *inode, const struct qstr *name) return 0; } +void do_make_empty_dir(struct inode *inode, struct inode *parent, + struct f2fs_dentry_ptr *d) +{ + struct f2fs_dir_entry *de; + + de = &d->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(d->filename[0], ".", 1); + set_de_type(de, inode->i_mode); + + de = &d->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(d->filename[1], "..", 2); + set_de_type(de, parent->i_mode); + + test_and_set_bit_le(0, (void *)d->bitmap); + test_and_set_bit_le(1, (void *)d->bitmap); +} + static int make_empty_dir(struct inode *inode, struct inode *parent, struct page *page) { struct page *dentry_page; struct f2fs_dentry_block *dentry_blk; - struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; + + if (f2fs_has_inline_dentry(inode)) + return make_empty_inline_dir(inode, parent, page); dentry_page = get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - dentry_blk = kmap_atomic(dentry_page); - de = &dentry_blk->dentry[0]; - de->name_len = cpu_to_le16(1); - de->hash_code = 0; - de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); - set_de_type(de, inode); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + do_make_empty_dir(inode, parent, &d); - de = &dentry_blk->dentry[1]; - de->hash_code = 0; - de->name_len = cpu_to_le16(2); - de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); - set_de_type(de, inode); - - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); kunmap_atomic(dentry_blk); set_page_dirty(dentry_page); @@ -333,8 +392,8 @@ static int make_empty_dir(struct inode *inode, return 0; } -static struct page *init_inode_metadata(struct inode *inode, - struct inode *dir, const struct qstr *name) +struct page *init_inode_metadata(struct inode *inode, struct inode *dir, + const struct qstr *name, struct page *dpage) { struct page *page; int err; @@ -350,13 +409,19 @@ static struct page *init_inode_metadata(struct inode *inode, goto error; } - err = f2fs_init_acl(inode, dir, page); + err = f2fs_init_acl(inode, dir, page, dpage); if (err) goto put_error; err = f2fs_init_security(inode, dir, name, page); if (err) goto put_error; + + if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) { + err = f2fs_inherit_context(dir, inode, page); + if (err) + goto put_error; + } } else { page = get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) @@ -395,10 +460,10 @@ static struct page *init_inode_metadata(struct inode *inode, return ERR_PTR(err); } -static void update_parent_metadata(struct inode *dir, struct inode *inode, +void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); @@ -413,58 +478,88 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } -static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +int room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; next: - zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_start); - if (zero_start >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); + if (zero_start >= max_slots) + return max_slots; - zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - zero_start); + zero_end = find_next_bit_le(bitmap, max_slots, zero_start); if (zero_end - zero_start >= slots) return zero_start; bit_start = zero_end + 1; - if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + if (zero_end + 1 >= max_slots) + return max_slots; goto next; } +void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, + const struct qstr *name, f2fs_hash_t name_hash, + unsigned int bit_pos) +{ + struct f2fs_dir_entry *de; + int slots = GET_DENTRY_SLOTS(name->len); + int i; + + de = &d->dentry[bit_pos]; + de->hash_code = name_hash; + de->name_len = cpu_to_le16(name->len); + memcpy(d->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(ino); + set_de_type(de, mode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, (void *)d->bitmap); +} + /* * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). */ int __f2fs_add_link(struct inode *dir, const struct qstr *name, - struct inode *inode) + struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; f2fs_hash_t dentry_hash; - struct f2fs_dir_entry *de; unsigned int nbucket, nblock; - size_t namelen = name->len; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; - int slots = GET_DENTRY_SLOTS(namelen); - struct page *page; - int err = 0; - int i; + struct f2fs_dentry_ptr d; + struct page *page = NULL; + struct f2fs_filename fname; + struct qstr new_name; + int slots, err; + + err = f2fs_fname_setup_filename(dir, name, 0, &fname); + if (err) + return err; + + new_name.name = fname_name(&fname); + new_name.len = fname_len(&fname); + + if (f2fs_has_inline_dentry(dir)) { + err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); + if (!err || err != -EAGAIN) + goto out; + else + err = 0; + } - dentry_hash = f2fs_dentry_hash(name); level = 0; + slots = GET_DENTRY_SLOTS(new_name.len); + dentry_hash = f2fs_dentry_hash(&new_name); + current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { level = F2FS_I(dir)->clevel; @@ -472,8 +567,10 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } start: - if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) - return -ENOSPC; + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) { + err = -ENOSPC; + goto out; + } /* Increase the depth, if required */ if (level == current_depth) @@ -487,11 +584,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_page = get_new_data_page(dir, NULL, block, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); + if (IS_ERR(dentry_page)) { + err = PTR_ERR(dentry_page); + goto out; + } dentry_blk = kmap(dentry_page); - bit_pos = room_for_filename(dentry_blk, slots); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; @@ -505,30 +605,33 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; + if (inode) { + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, &new_name, NULL); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + if (f2fs_encrypted_inode(dir)) + file_set_enc_name(inode); } - de = &dentry_blk->dentry[bit_pos]; - de->hash_code = dentry_hash; - de->name_len = cpu_to_le16(namelen); - memcpy(dentry_blk->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); - for (i = 0; i < slots; i++) - test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos); + set_page_dirty(dentry_page); - /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); + if (inode) { + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + } update_parent_metadata(dir, inode, current_depth); fail: - up_write(&F2FS_I(inode)->i_sem); + if (inode) + up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode_page(dir); @@ -536,6 +639,8 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, } kunmap(dentry_page); f2fs_put_page(dentry_page, 1); +out: + f2fs_fname_free_filename(&fname); return err; } @@ -545,7 +650,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, NULL); + page = init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -560,19 +665,50 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) return err; } +void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + down_write(&F2FS_I(inode)->i_sem); + + if (S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + if (page) + update_inode(dir, page); + else + update_inode_page(dir); + } + inode->i_ctime = CURRENT_TIME; + + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + up_write(&F2FS_I(inode)->i_sem); + update_inode_page(inode); + + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); +} + /* * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *inode) + struct inode *dir, struct inode *inode) { struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; - struct inode *dir = page->mapping->host; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; + if (f2fs_has_inline_dentry(dir)) + return f2fs_delete_inline_entry(dentry, page, dir, inode); + lock_page(page); f2fs_wait_on_page_writeback(page, DATA); @@ -590,33 +726,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dir->i_ctime = dir->i_mtime = CURRENT_TIME; - if (inode) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - - down_write(&F2FS_I(inode)->i_sem); - - if (S_ISDIR(inode->i_mode)) { - drop_nlink(dir); - update_inode_page(dir); - } - inode->i_ctime = CURRENT_TIME; - drop_nlink(inode); - if (S_ISDIR(inode->i_mode)) { - drop_nlink(inode); - i_size_write(inode, 0); - } - up_write(&F2FS_I(inode)->i_sem); - update_inode_page(inode); - - if (inode->i_nlink == 0) - add_orphan_inode(sbi, inode->i_ino); - else - release_orphan_inode(sbi); - } + if (inode) + f2fs_drop_nlink(dir, inode, NULL); if (bit_pos == NR_DENTRY_IN_BLOCK) { truncate_hole(dir, page->index, page->index + 1); clear_page_dirty_for_io(page); + ClearPagePrivate(page); ClearPageUptodate(page); inode_dec_dirty_pages(dir); } @@ -628,9 +744,12 @@ bool f2fs_empty_dir(struct inode *dir) unsigned long bidx; struct page *dentry_page; unsigned int bit_pos; - struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); + if (f2fs_has_inline_dentry(dir)) + return f2fs_empty_inline_dir(dir); + for (bidx = 0; bidx < nblock; bidx++) { dentry_page = get_lock_data_page(dir, bidx); if (IS_ERR(dentry_page)) { @@ -640,7 +759,6 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - dentry_blk = kmap_atomic(dentry_page); if (bidx == 0) bit_pos = 2; @@ -659,19 +777,89 @@ bool f2fs_empty_dir(struct inode *dir) return true; } -static int f2fs_readdir(struct file *file, struct dir_context *ctx) +bool f2fs_fill_dentries(struct file *file, void *dirent, filldir_t filldir, + struct f2fs_dentry_ptr *d, unsigned int n, unsigned int bit_pos, + struct f2fs_str *fstr) +{ + unsigned int start_bit_pos = bit_pos; + unsigned char d_type; + struct f2fs_dir_entry *de = NULL; + struct f2fs_str de_name = FSTR_INIT(NULL, 0); + unsigned char *types = f2fs_filetype_table; + int over; + + while (bit_pos < d->max) { + d_type = DT_UNKNOWN; + bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); + if (bit_pos >= d->max) + break; + + de = &d->dentry[bit_pos]; + + if (types && de->file_type < F2FS_FT_MAX) + d_type = types[de->file_type]; + + /* encrypted case */ + de_name.name = d->filename[bit_pos]; + de_name.len = le16_to_cpu(de->name_len); + + if (f2fs_encrypted_inode(d->inode)) { + int save_len = fstr->len; + int ret; + + ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code, + &de_name, fstr); + de_name = *fstr; + fstr->len = save_len; + if (ret < 0) + return true; + } + + over = filldir(dirent, de_name.name, de_name.len, + (n * d->max) + bit_pos, + le32_to_cpu(de->ino), d_type); + if (over) { + file->f_pos += bit_pos - start_bit_pos; + return true; + } + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + } + return false; +} + +static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) { + unsigned long pos = file->f_pos; + unsigned int bit_pos = 0; struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); - unsigned int bit_pos = 0; struct f2fs_dentry_block *dentry_blk = NULL; - struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; - unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); - unsigned char d_type = DT_UNKNOWN; + struct f2fs_dentry_ptr d; + struct f2fs_str fstr = FSTR_INIT(NULL, 0); + unsigned int n = 0; + int err = 0; - bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); + if (f2fs_encrypted_inode(inode)) { + err = f2fs_get_encryption_info(inode); + if (err) + return err; + + err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN, + &fstr); + if (err < 0) + return err; + } + + if (f2fs_has_inline_dentry(inode)) { + err = f2fs_read_inline_dir(file, dirent, filldir, &fstr); + goto out; + } + + bit_pos = (pos % NR_DENTRY_IN_BLOCK); + n = (pos / NR_DENTRY_IN_BLOCK); /* readahead for multi pages of dir */ if (npages - n > 1 && !ra_has_index(ra, n)) @@ -684,29 +872,14 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) continue; dentry_blk = kmap(dentry_page); - while (bit_pos < NR_DENTRY_IN_BLOCK) { - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_pos); - if (bit_pos >= NR_DENTRY_IN_BLOCK) - break; - - de = &dentry_blk->dentry[bit_pos]; - if (de->file_type < F2FS_FT_MAX) - d_type = f2fs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; - if (!dir_emit(ctx, - dentry_blk->filename[bit_pos], - le16_to_cpu(de->name_len), - le32_to_cpu(de->ino), d_type)) - goto stop; - - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos; - } + + make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); + + if (f2fs_fill_dentries(file, dirent, filldir, &d, n, bit_pos, &fstr)) + goto stop; + bit_pos = 0; - ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; + file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); dentry_page = NULL; @@ -716,14 +889,18 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } - - return 0; +out: + f2fs_fname_crypto_free_buffer(&fstr); + return err; } const struct file_operations f2fs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = f2fs_readdir, + .readdir = f2fs_readdir, .fsync = f2fs_sync_file, .unlocked_ioctl = f2fs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = f2fs_compat_ioctl, +#endif }; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8171e80b..c604f8ef 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -28,7 +28,7 @@ do { \ if (unlikely(condition)) { \ WARN_ON(1); \ - sbi->need_fsck = true; \ + set_sbi_flag(sbi, SBI_NEED_FSCK); \ } \ } while (0) #define f2fs_down_write(x, y) down_write(x) @@ -46,8 +46,11 @@ #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 #define F2FS_MOUNT_INLINE_XATTR 0x00000080 #define F2FS_MOUNT_INLINE_DATA 0x00000100 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 -#define F2FS_MOUNT_NOBARRIER 0x00000400 +#define F2FS_MOUNT_INLINE_DENTRY 0x00000200 +#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 +#define F2FS_MOUNT_NOBARRIER 0x00000800 +#define F2FS_MOUNT_FASTBOOT 0x00001000 +#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -67,6 +70,15 @@ struct f2fs_mount_info { unsigned int opt; }; +#define F2FS_FEATURE_ENCRYPT 0x0001 + +#define F2FS_HAS_FEATURE(sb, mask) \ + ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) +#define F2FS_SET_FEATURE(sb, mask) \ + F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask) +#define F2FS_CLEAR_FEATURE(sb, mask) \ + F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask) + #define CRCPOLY_LE 0xedb88320 static inline __u32 f2fs_crc32(void *buf, size_t len) @@ -98,10 +110,19 @@ enum { enum { CP_UMOUNT, + CP_FASTBOOT, CP_SYNC, + CP_RECOVERY, CP_DISCARD, }; +#define DEF_BATCHED_TRIM_SECTIONS 32 +#define BATCHED_TRIM_SEGMENTS(sbi) \ + (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) +#define BATCHED_TRIM_BLOCKS(sbi) \ + (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) +#define DEF_CP_INTERVAL 60 /* 60 secs */ + struct cp_control { int reason; __u64 trim_start; @@ -134,8 +155,14 @@ struct ino_entry { nid_t ino; /* inode number */ }; -/* for the list of directory inodes */ -struct dir_inode_entry { +/* + * for the list of directory inodes or gc inodes. + * NOTE: there are two slab users for this structure, if we add/modify/delete + * fields in structure for one of slab users, it may affect fields or size of + * other one, in this condition, it's better to split both of slab and related + * data structure. + */ +struct inode_entry { struct list_head list; /* list head */ struct inode *inode; /* vfs inode pointer */ }; @@ -194,11 +221,30 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, */ #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS +#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION +#define FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ + +/* + * Flags for going down operation used by FS_IOC_GOINGDOWN + */ +#define FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ +#define FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ +#define FS_GOING_DOWN_NOSYNC 0x2 /* going down */ +#define FS_GOING_STOP_GC 0x3 /* stoping all gc */ #define F2FS_IOCTL_MAGIC 0xf5 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) +#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) +#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) + +#define F2FS_IOC_SET_ENCRYPTION_POLICY \ + _IOR('f', 19, struct f2fs_encryption_policy) +#define F2FS_IOC_GET_ENCRYPTION_PWSALT \ + _IOW('f', 20, __u8[16]) +#define F2FS_IOC_GET_ENCRYPTION_POLICY \ + _IOW('f', 21, struct f2fs_encryption_policy) #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* @@ -211,6 +257,54 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, /* * For INODE and NODE manager */ +/* for directory operations */ +struct f2fs_str { + unsigned char *name; + u32 len; +}; + +struct f2fs_filename { + const struct qstr *usr_fname; + struct f2fs_str disk_name; + f2fs_hash_t hash; +#ifdef CONFIG_F2FS_FS_ENCRYPTION + struct f2fs_str crypto_buf; +#endif +}; + +#define FSTR_INIT(n, l) { .name = n, .len = l } +#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) + +struct f2fs_dentry_ptr { + struct inode *inode; + const void *bitmap; + struct f2fs_dir_entry *dentry; + __u8 (*filename)[F2FS_SLOT_LEN]; + int max; +}; + +static inline void make_dentry_ptr(struct inode *inode, + struct f2fs_dentry_ptr *d, void *src, int type) +{ + d->inode = inode; + + if (type == 1) { + struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; + d->max = NR_DENTRY_IN_BLOCK; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } else { + struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; + d->max = NR_INLINE_DENTRY; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } +} + /* * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 * as its node offset to distinguish from index node blocks. @@ -231,14 +325,52 @@ enum { #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ +/* vector size for gang look-up from extent cache that consists of radix tree */ +#define EXT_TREE_VEC_SIZE 64 + /* for in-memory extent cache entry */ -#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ +#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ + +/* number of extent info in extent cache we try to shrink */ +#define EXTENT_CACHE_SHRINK_NUMBER 128 struct extent_info { - rwlock_t ext_lock; /* rwlock for consistency */ - unsigned int fofs; /* start offset in a file */ - u32 blk_addr; /* start block address of the extent */ - unsigned int len; /* length of the extent */ + unsigned int fofs; /* start offset in a file */ + u32 blk; /* start block address of the extent */ + unsigned int len; /* length of the extent */ +}; + +struct extent_node { + struct rb_node rb_node; /* rb node located in rb-tree */ + struct list_head list; /* node in global extent list of sbi */ + struct extent_info ei; /* extent info */ +}; + +struct extent_tree { + nid_t ino; /* inode number */ + struct rb_root root; /* root of extent info rb-tree */ + struct extent_node *cached_en; /* recently accessed extent node */ + rwlock_t lock; /* protect extent info rb-tree */ + atomic_t refcount; /* reference count of rb-tree */ + unsigned int count; /* # of extent node in rb-tree*/ +}; + +/* + * This structure is taken from ext4_map_blocks. + * + * Note that, however, f2fs uses NEW and MAPPED flags for f2fs_map_blocks(). + */ +#define F2FS_MAP_NEW (1 << BH_New) +#define F2FS_MAP_MAPPED (1 << BH_Mapped) +#define F2FS_MAP_UNWRITTEN (1 << BH_Unwritten) +#define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED |\ + F2FS_MAP_UNWRITTEN) + +struct f2fs_map_blocks { + block_t m_pblk; + block_t m_lblk; + unsigned int m_len; + unsigned int m_flags; }; /* @@ -246,6 +378,29 @@ struct extent_info { */ #define FADVISE_COLD_BIT 0x01 #define FADVISE_LOST_PINO_BIT 0x02 +#define FADVISE_ENCRYPT_BIT 0x04 +#define FADVISE_ENC_NAME_BIT 0x08 + +#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) +#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) +#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) +#define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT) +#define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT) +#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) +#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT) +#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) + +/* Encryption algorithms */ +#define F2FS_ENCRYPTION_MODE_INVALID 0 +#define F2FS_ENCRYPTION_MODE_AES_256_XTS 1 +#define F2FS_ENCRYPTION_MODE_AES_256_GCM 2 +#define F2FS_ENCRYPTION_MODE_AES_256_CBC 3 +#define F2FS_ENCRYPTION_MODE_AES_256_CTS 4 + +#include "f2fs_crypto.h" #define DEF_DIR_LEVEL 0 @@ -267,30 +422,67 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ - struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + rwlock_t ext_lock; /* rwlock for single extent cache */ + struct inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ + +#ifdef CONFIG_F2FS_FS_ENCRYPTION + /* Encryption params */ + struct f2fs_crypt_info *i_crypt_info; +#endif }; static inline void get_extent_info(struct extent_info *ext, struct f2fs_extent i_ext) { - write_lock(&ext->ext_lock); ext->fofs = le32_to_cpu(i_ext.fofs); - ext->blk_addr = le32_to_cpu(i_ext.blk_addr); + ext->blk = le32_to_cpu(i_ext.blk); ext->len = le32_to_cpu(i_ext.len); - write_unlock(&ext->ext_lock); } static inline void set_raw_extent(struct extent_info *ext, struct f2fs_extent *i_ext) { - read_lock(&ext->ext_lock); i_ext->fofs = cpu_to_le32(ext->fofs); - i_ext->blk_addr = cpu_to_le32(ext->blk_addr); + i_ext->blk = cpu_to_le32(ext->blk); i_ext->len = cpu_to_le32(ext->len); - read_unlock(&ext->ext_lock); +} + +static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, + u32 blk, unsigned int len) +{ + ei->fofs = fofs; + ei->blk = blk; + ei->len = len; +} + +static inline bool __is_extent_same(struct extent_info *ei1, + struct extent_info *ei2) +{ + return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk && + ei1->len == ei2->len); +} + +static inline bool __is_extent_mergeable(struct extent_info *back, + struct extent_info *front) +{ + return (back->fofs + back->len == front->fofs && + back->blk + back->len == front->blk); +} + +static inline bool __is_back_mergeable(struct extent_info *cur, + struct extent_info *back) +{ + return __is_extent_mergeable(back, cur); +} + +static inline bool __is_front_mergeable(struct extent_info *cur, + struct extent_info *front) +{ + return __is_extent_mergeable(cur, front); } struct f2fs_nm_info { @@ -303,7 +495,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ @@ -369,7 +561,8 @@ enum { CURSEG_HOT_NODE, /* direct node blocks of directory files */ CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ - NO_CHECK_TYPE + NO_CHECK_TYPE, + CURSEG_DIRECT_IO, /* to use for the direct IO path */ }; struct flush_cmd { @@ -408,6 +601,9 @@ struct f2fs_sm_info { int nr_discards; /* # of discards in the list */ int max_discards; /* max. discards to be issued */ + /* for batched trimming */ + unsigned int trim_sections; /* # of sections to trim */ + struct list_head sit_entry_set; /* sit entry set list */ unsigned int ipu_policy; /* in-place-update policy */ @@ -433,6 +629,7 @@ enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_NODES, F2FS_DIRTY_META, + F2FS_INMEM_PAGES, NR_COUNT_TYPE, }; @@ -454,11 +651,19 @@ enum page_type { META, NR_PAGE_TYPE, META_FLUSH, + INMEM, /* the below types are used by tracepoints only. */ + INMEM_DROP, + IPU, + OPU, }; struct f2fs_io_info { + struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ + block_t blk_addr; /* block address to be written */ + struct page *page; /* page to be written */ + struct page *encrypted_page; /* encrypted page */ }; #define is_read_io(rw) (((rw) & 1) == READ) @@ -470,13 +675,29 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ }; +/* for inner inode cache management */ +struct inode_management { + struct radix_tree_root ino_root; /* ino entry array */ + spinlock_t ino_lock; /* for ino entry lock */ + struct list_head ino_list; /* inode list head */ + unsigned long ino_num; /* number of entries */ +}; + +/* For s_flag in struct f2fs_sb_info */ +enum { + SBI_IS_DIRTY, /* dirty flag for checkpoint */ + SBI_IS_CLOSE, /* specify unmounting */ + SBI_NEED_FSCK, /* need fsck.f2fs to fix */ + SBI_POR_DOING, /* recovery is doing or not */ + SBI_NO_GC, /* disable f2fs gc */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ - int s_dirty; /* dirty flag for checkpoint */ - bool need_fsck; /* need fsck.f2fs to fix */ + int s_flag; /* flags for sbi */ /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ @@ -488,7 +709,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ - struct completion *wait_io; /* for completion bios */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ @@ -497,22 +717,26 @@ struct f2fs_sb_info { struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ - bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; + long cp_expires, cp_interval; /* next expected periodic cp */ - /* for inode management */ - struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ - spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ - struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ + struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ /* for orphan inode, use 0'th array */ - unsigned int n_orphans; /* # of orphan inodes */ unsigned int max_orphans; /* max orphan inodes */ /* for directory inode management */ struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ + /* for extent tree cache */ + struct radix_tree_root extent_tree_root;/* cache extent cache entries */ + struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ + struct list_head extent_list; /* lru list for shrinker */ + spinlock_t extent_lock; /* locking extent lru list */ + int total_ext_tree; /* extent tree count */ + atomic_t total_ext_node; /* extent info count */ + /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_blocksize; /* log2 block size */ @@ -528,12 +752,14 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ unsigned int total_valid_inode_count; /* valid inode count */ + loff_t max_file_blocks; /* max block index of file */ int active_logs; /* # of active logs */ int dir_level; /* directory level */ block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ block_t alloc_valid_block_count; /* # of allocated blocks */ + block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ u32 s_next_generation; /* for NFS support */ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ @@ -556,8 +782,10 @@ struct f2fs_sb_info { struct f2fs_stat_info *stat_info; /* FS status information */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ + atomic_t inplace_count; /* # of inplace update */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ - int inline_inode; /* # of inline_data inodes */ + atomic_t inline_inode; /* # of inline_data inodes */ + atomic_t inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ unsigned int n_dirty_dirs; /* # of dir inodes */ #endif @@ -652,14 +880,19 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) return sbi->node_inode->i_mapping; } -static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) +static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type) +{ + return sbi->s_flag & (0x01 << type); +} + +static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) { - sbi->s_dirty = 1; + sbi->s_flag |= (0x01 << type); } -static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) +static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) { - sbi->s_dirty = 0; + sbi->s_flag &= ~(0x01 << type); } static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) @@ -707,6 +940,28 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) up_write(&sbi->cp_rwsem); } +static inline int __get_cp_reason(struct f2fs_sb_info *sbi) +{ + int reason = CP_SYNC; + + if (test_opt(sbi, FASTBOOT)) + reason = CP_FASTBOOT; + if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) + reason = CP_UMOUNT; + return reason; +} + +static inline bool __remain_node_summaries(int reason) +{ + return (reason == CP_UMOUNT || reason == CP_FASTBOOT); +} + +static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) +{ + return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) || + is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG)); +} + /* * Check whether the given nid is within node id range. */ @@ -771,7 +1026,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { atomic_inc(&sbi->nr_pages[count_type]); - F2FS_SET_SB_DIRT(sbi); + set_sbi_flag(sbi, SBI_IS_DIRTY); } static inline void inode_inc_dirty_pages(struct inode *inode) @@ -833,12 +1088,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) return 0; } +static inline block_t __cp_payload(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); +} + static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); int offset; - if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { + if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) return &ckpt->sit_nat_version_bitmap; else @@ -988,6 +1248,13 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } +static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + while (radix_tree_insert(root, index, item)) + cond_resched(); +} + #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) static inline bool IS_INODE(struct page *page) @@ -1020,7 +1287,25 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) return mask & *addr; } -static inline int f2fs_set_bit(unsigned int nr, char *addr) +static inline void f2fs_set_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr |= mask; +} + +static inline void f2fs_clear_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr &= ~mask; +} + +static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1032,7 +1317,7 @@ static inline int f2fs_set_bit(unsigned int nr, char *addr) return ret; } -static inline int f2fs_clear_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1044,6 +1329,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) return ret; } +static inline void f2fs_change_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr ^= mask; +} + /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -1057,11 +1351,16 @@ enum { FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ FI_APPEND_WRITE, /* inode has appended data */ FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ + FI_DROP_CACHE, /* drop dirty page cache */ + FI_DATA_EXIST, /* indicate data exists */ + FI_INLINE_DOTS, /* indicate inline dot dentries */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1087,15 +1386,6 @@ static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) set_inode_flag(fi, FI_ACL_MODE); } -static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) -{ - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - clear_inode_flag(fi, FI_ACL_MODE); - return 1; - } - return 0; -} - static inline void get_inline_info(struct f2fs_inode_info *fi, struct f2fs_inode *ri) { @@ -1103,6 +1393,12 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_XATTR); if (ri->i_inline & F2FS_INLINE_DATA) set_inode_flag(fi, FI_INLINE_DATA); + if (ri->i_inline & F2FS_INLINE_DENTRY) + set_inode_flag(fi, FI_INLINE_DENTRY); + if (ri->i_inline & F2FS_DATA_EXIST) + set_inode_flag(fi, FI_DATA_EXIST); + if (ri->i_inline & F2FS_INLINE_DOTS) + set_inode_flag(fi, FI_INLINE_DOTS); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1114,6 +1410,12 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_XATTR; if (is_inode_flag_set(fi, FI_INLINE_DATA)) ri->i_inline |= F2FS_INLINE_DATA; + if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) + ri->i_inline |= F2FS_INLINE_DENTRY; + if (is_inode_flag_set(fi, FI_DATA_EXIST)) + ri->i_inline |= F2FS_DATA_EXIST; + if (is_inode_flag_set(fi, FI_INLINE_DOTS)) + ri->i_inline |= F2FS_INLINE_DOTS; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1148,6 +1450,22 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); } +static inline void f2fs_clear_inline_inode(struct inode *inode) +{ + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST); +} + +static inline int f2fs_exist_data(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); +} + +static inline int f2fs_has_inline_dots(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1158,12 +1476,48 @@ static inline bool f2fs_is_volatile_file(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); } +static inline bool f2fs_is_first_block_written(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); +} + +static inline bool f2fs_is_drop_cache(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); +} + static inline void *inline_data_addr(struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); return (void *)&(ri->i_addr[1]); } +static inline int f2fs_has_inline_dentry(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); +} + +static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) +{ + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); +} + +static inline int is_file(struct inode *inode, int type) +{ + return F2FS_I(inode)->i_advise & type; +} + +static inline void set_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise |= type; +} + +static inline void clear_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise &= ~type; +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; @@ -1180,6 +1534,17 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) sbi->sb->s_flags |= MS_RDONLY; } +static inline bool is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + #define get_inode_mode(i) \ ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -1220,32 +1585,53 @@ void handle_failed_inode(struct inode *); * namei.c */ struct dentry *f2fs_get_parent(struct dentry *child); +#ifdef CONFIG_F2FS_EMULATED_SD +void f2fs_set_nocase_dop(struct inode *inode); +#endif /* * dir.c */ +extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; +void set_de_type(struct f2fs_dir_entry *, umode_t); +struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *, + f2fs_hash_t, int *, struct f2fs_dentry_ptr *, + unsigned int); +bool f2fs_fill_dentries(struct file *, void *, filldir_t, + struct f2fs_dentry_ptr *, unsigned int, unsigned int, struct f2fs_str *); +void do_make_empty_dir(struct inode *, struct inode *, + struct f2fs_dentry_ptr *); +struct page *init_inode_metadata(struct inode *, struct inode *, + const struct qstr *, struct page *); +void update_parent_metadata(struct inode *, struct inode *, unsigned int); +int room_for_filename(const void *, int, int); +void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, - struct page **); + struct page **, unsigned int flags); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); -int update_dent_inode(struct inode *, const struct qstr *); -int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); -void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +int update_dent_inode(struct inode *, struct inode *, const struct qstr *); +void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, + const struct qstr *, f2fs_hash_t , unsigned int); +int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, + umode_t); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, + struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); -int f2fs_make_empty(struct inode *, struct inode *); bool f2fs_empty_dir(struct inode *); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, - inode); + inode, inode->i_ino, inode->i_mode); } /* * super.c */ +int f2fs_commit_super(struct f2fs_sb_info *, bool); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); @@ -1262,8 +1648,8 @@ struct dnode_of_data; struct node_info; bool available_free_memory(struct f2fs_sb_info *, int); +int need_dentry_mark(struct f2fs_sb_info *, nid_t); bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); -bool has_fsynced_inode(struct f2fs_sb_info *, nid_t); bool need_inode_block_update(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); @@ -1304,21 +1690,20 @@ int create_flush_cmd_control(struct f2fs_sb_info *); void destroy_flush_cmd_control(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); -void clear_prefree_segments(struct f2fs_sb_info *); +void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); void release_discard_addrs(struct f2fs_sb_info *); void discard_next_dnode(struct f2fs_sb_info *, block_t); -int npages_for_summary_flush(struct f2fs_sb_info *); +int npages_for_summary_flush(struct f2fs_sb_info *, bool); void allocate_new_segments(struct f2fs_sb_info *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); +void update_meta_page(struct f2fs_sb_info *, void *, block_t); void write_meta_page(struct f2fs_sb_info *, struct page *); -void write_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_io_info *, unsigned int, block_t, block_t *); -void write_data_page(struct page *, struct dnode_of_data *, block_t *, - struct f2fs_io_info *); -void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); -void recover_data_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, block_t, block_t); +void write_node_page(unsigned int, struct f2fs_io_info *); +void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); +void rewrite_data_page(struct f2fs_io_info *); +void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *, + block_t, block_t, unsigned char, bool); void allocate_data_block(struct f2fs_sb_info *, struct page *, block_t, block_t *, struct f2fs_summary *, int); void f2fs_wait_on_page_writeback(struct page *, enum page_type); @@ -1337,8 +1722,9 @@ void destroy_segment_manager_caches(void); */ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); +bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); +void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); @@ -1363,17 +1749,27 @@ void destroy_checkpoint_caches(void); * data.c */ void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); -int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); -void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, - struct f2fs_io_info *); +int f2fs_submit_page_bio(struct f2fs_io_info *); +void f2fs_submit_page_mbio(struct f2fs_io_info *); +void set_data_blkaddr(struct dnode_of_data *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); -void update_extent_cache(block_t, struct dnode_of_data *); -struct page *find_data_page(struct inode *, pgoff_t, bool); +void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); +void f2fs_destroy_extent_tree(struct inode *); +void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); +void f2fs_update_extent_cache(struct dnode_of_data *); +void f2fs_preserve_extent_tree(struct inode *); +struct page *get_read_data_page(struct inode *, pgoff_t, int); +struct page *find_data_page(struct inode *, pgoff_t); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); -int do_write_data_page(struct page *, struct f2fs_io_info *); +int do_write_data_page(struct f2fs_io_info *); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); +void f2fs_invalidate_page(struct page *, unsigned long); +void init_extent_cache_info(struct f2fs_sb_info *); +int __init create_extent_cache(void); +void destroy_extent_cache(void); +int f2fs_release_page(struct page *, gfp_t); /* * gc.c @@ -1383,8 +1779,6 @@ void stop_gc_thread(struct f2fs_sb_info *); block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); int f2fs_gc(struct f2fs_sb_info *); void build_gc_manager(struct f2fs_sb_info *); -int __init create_gc_caches(void); -void destroy_gc_caches(void); /* * recovery.c @@ -1401,11 +1795,11 @@ struct f2fs_stat_info { struct f2fs_sb_info *sbi; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int main_area_segs, main_area_sections, main_area_zones; - int hit_ext, total_ext; + int hit_ext, total_ext, ext_tree, ext_node; int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; - int nats, sits, fnids; + int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; - int bg_gc, inline_inode; + int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -1413,14 +1807,17 @@ struct f2fs_stat_info { int dirty_count, node_pages, meta_pages; int prefree_count, call_count, cp_count; int tot_segs, node_segs, data_segs, free_segs, free_secs; + int bg_node_segs, bg_data_segs; int tot_blks, data_blks, node_blks; + int bg_data_blks, bg_node_blks; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; unsigned int segment_count[2]; unsigned int block_count[2]; - unsigned base_mem, cache_mem; + unsigned int inplace_count; + unsigned base_mem, cache_mem, page_mem; }; static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) @@ -1438,44 +1835,59 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode++); \ + (atomic_inc(&F2FS_I_SB(inode)->inline_inode)); \ } while (0) #define stat_dec_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode--); \ + (atomic_dec(&F2FS_I_SB(inode)->inline_inode)); \ + } while (0) +#define stat_inc_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + (atomic_inc(&F2FS_I_SB(inode)->inline_dir)); \ + } while (0) +#define stat_dec_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) - #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ ((sbi)->block_count[(curseg)->alloc_type]++) - -#define stat_inc_seg_count(sbi, type) \ +#define stat_inc_inplace_blocks(sbi) \ + (atomic_inc(&(sbi)->inplace_count)) +#define stat_inc_seg_count(sbi, type, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ (si)->tot_segs++; \ - if (type == SUM_TYPE_DATA) \ + if (type == SUM_TYPE_DATA) { \ si->data_segs++; \ - else \ + si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ + } else { \ si->node_segs++; \ + si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \ + } \ } while (0) #define stat_inc_tot_blk_count(si, blks) \ (si->tot_blks += (blks)) -#define stat_inc_data_blk_count(sbi, blks) \ +#define stat_inc_data_blk_count(sbi, blks, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->data_blks += (blks); \ + si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \ } while (0) -#define stat_inc_node_blk_count(sbi, blks) \ +#define stat_inc_node_blk_count(sbi, blks, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->node_blks += (blks); \ + si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ } while (0) int f2fs_build_stats(struct f2fs_sb_info *); @@ -1492,12 +1904,15 @@ void f2fs_destroy_root_stats(void); #define stat_inc_read_hit(sb) #define stat_inc_inline_inode(inode) #define stat_dec_inline_inode(inode) +#define stat_inc_inline_dir(inode) +#define stat_dec_inline_dir(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) -#define stat_inc_seg_count(si, type) +#define stat_inc_inplace_blocks(sbi) +#define stat_inc_seg_count(sbi, type, gc_type) #define stat_inc_tot_blk_count(si, blks) -#define stat_inc_data_blk_count(si, blks) -#define stat_inc_node_blk_count(sbi, blks) +#define stat_inc_data_blk_count(sbi, blks, gc_type) +#define stat_inc_node_blk_count(sbi, blks, gc_type) static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } @@ -1513,15 +1928,161 @@ extern const struct address_space_operations f2fs_node_aops; extern const struct address_space_operations f2fs_meta_aops; extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; +extern const struct inode_operations f2fs_encrypted_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; +extern struct kmem_cache *inode_entry_slab; /* * inline.c */ -bool f2fs_may_inline(struct inode *); +bool f2fs_may_inline_data(struct inode *); +bool f2fs_may_inline_dentry(struct inode *); +void read_inline_data(struct page *, struct page *); +bool truncate_inline_inode(struct page *, u64); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); -int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); -void truncate_inline_data(struct inode *, u64); +int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); +int f2fs_convert_inline_inode(struct inode *); +int f2fs_write_inline_data(struct inode *, struct page *); bool recover_inline_data(struct inode *, struct page *); +struct f2fs_dir_entry *find_in_inline_dir(struct inode *, + struct f2fs_filename *, struct page **, unsigned int); +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); +int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, + nid_t, umode_t); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, + struct inode *, struct inode *); +bool f2fs_empty_inline_dir(struct inode *); +int f2fs_read_inline_dir(struct file *, void *, filldir_t, struct f2fs_str *); + +/* + * crypto support + */ +static inline int f2fs_encrypted_inode(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + return file_is_encrypt(inode); +#else + return 0; +#endif +} + +static inline void f2fs_set_encrypted_inode(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + file_set_encrypt(inode); +#endif +} + +static inline bool f2fs_bio_encrypted(struct bio *bio) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + return unlikely(bio->bi_private != NULL); +#else + return false; +#endif +} + +static inline int f2fs_sb_has_crypto(struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); +#else + return 0; +#endif +} + +static inline bool f2fs_may_encrypt(struct inode *inode) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + mode_t mode = inode->i_mode; + + return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); +#else + return 0; +#endif +} + +/* crypto_policy.c */ +int f2fs_is_child_context_consistent_with_parent(struct inode *, + struct inode *); +int f2fs_inherit_context(struct inode *, struct inode *, struct page *); +int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *); +int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *); + +/* crypt.c */ +extern struct kmem_cache *f2fs_crypt_info_cachep; +bool f2fs_valid_contents_enc_mode(uint32_t); +uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t); +struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *); +void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *); +struct page *f2fs_encrypt(struct inode *, struct page *); +int f2fs_decrypt(struct f2fs_crypto_ctx *, struct page *); +int f2fs_decrypt_one(struct inode *, struct page *); +void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); + +/* crypto_key.c */ +void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *); +int _f2fs_get_encryption_info(struct inode *inode); + +/* crypto_fname.c */ +bool f2fs_valid_filenames_enc_mode(uint32_t); +u32 f2fs_fname_crypto_round_up(u32, u32); +int f2fs_fname_crypto_alloc_buffer(struct inode *, u32, struct f2fs_str *); +int f2fs_fname_disk_to_usr(struct inode *, f2fs_hash_t *, + const struct f2fs_str *, struct f2fs_str *); +int f2fs_fname_usr_to_disk(struct inode *, const struct qstr *, + struct f2fs_str *); + +#ifdef CONFIG_F2FS_FS_ENCRYPTION +void f2fs_restore_and_release_control_page(struct page **); +void f2fs_restore_control_page(struct page *); + +int __init f2fs_init_crypto(void); +int f2fs_crypto_initialize(void); +void f2fs_exit_crypto(void); + +int f2fs_has_encryption_key(struct inode *); + +static inline int f2fs_get_encryption_info(struct inode *inode) +{ + struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; + + if (!ci || + (ci->ci_keyring_key && + (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED) | + (1 << KEY_FLAG_DEAD))))) + return _f2fs_get_encryption_info(inode); + return 0; +} + +void f2fs_fname_crypto_free_buffer(struct f2fs_str *); +int f2fs_fname_setup_filename(struct inode *, const struct qstr *, + int lookup, struct f2fs_filename *); +void f2fs_fname_free_filename(struct f2fs_filename *); +#else +static inline void f2fs_restore_and_release_control_page(struct page **p) { } +static inline void f2fs_restore_control_page(struct page *p) { } + +static inline int __init f2fs_init_crypto(void) { return 0; } +static inline void f2fs_exit_crypto(void) { } + +static inline int f2fs_has_encryption_key(struct inode *i) { return 0; } +static inline int f2fs_get_encryption_info(struct inode *i) { return 0; } +static inline void f2fs_fname_crypto_free_buffer(struct f2fs_str *p) { } + +static inline int f2fs_fname_setup_filename(struct inode *dir, + const struct qstr *iname, + int lookup, struct f2fs_filename *fname) +{ + memset(fname, 0, sizeof(struct f2fs_filename)); + fname->usr_fname = iname; + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + return 0; +} + +static inline void f2fs_fname_free_filename(struct f2fs_filename *fname) { } +#endif #endif diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h new file mode 100644 index 00000000..c2c1c2b6 --- /dev/null +++ b/fs/f2fs/f2fs_crypto.h @@ -0,0 +1,151 @@ +/* + * linux/fs/f2fs/f2fs_crypto.h + * + * Copied from linux/fs/ext4/ext4_crypto.h + * + * Copyright (C) 2015, Google, Inc. + * + * This contains encryption header content for f2fs + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#ifndef _F2FS_CRYPTO_H +#define _F2FS_CRYPTO_H + +#include + +#define F2FS_KEY_DESCRIPTOR_SIZE 8 + +/* Policy provided via an ioctl on the topmost directory */ +struct f2fs_encryption_policy { + char version; + char contents_encryption_mode; + char filenames_encryption_mode; + char flags; + char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE]; +} __attribute__((__packed__)); + +#define F2FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 +#define F2FS_KEY_DERIVATION_NONCE_SIZE 16 + +#define F2FS_POLICY_FLAGS_PAD_4 0x00 +#define F2FS_POLICY_FLAGS_PAD_8 0x01 +#define F2FS_POLICY_FLAGS_PAD_16 0x02 +#define F2FS_POLICY_FLAGS_PAD_32 0x03 +#define F2FS_POLICY_FLAGS_PAD_MASK 0x03 +#define F2FS_POLICY_FLAGS_VALID 0x03 + +/** + * Encryption context for inode + * + * Protector format: + * 1 byte: Protector format (1 = this version) + * 1 byte: File contents encryption mode + * 1 byte: File names encryption mode + * 1 byte: Flags + * 8 bytes: Master Key descriptor + * 16 bytes: Encryption Key derivation nonce + */ +struct f2fs_encryption_context { + char format; + char contents_encryption_mode; + char filenames_encryption_mode; + char flags; + char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE]; + char nonce[F2FS_KEY_DERIVATION_NONCE_SIZE]; +} __attribute__((__packed__)); + +/* Encryption parameters */ +#define F2FS_XTS_TWEAK_SIZE 16 +#define F2FS_AES_128_ECB_KEY_SIZE 16 +#define F2FS_AES_256_GCM_KEY_SIZE 32 +#define F2FS_AES_256_CBC_KEY_SIZE 32 +#define F2FS_AES_256_CTS_KEY_SIZE 32 +#define F2FS_AES_256_XTS_KEY_SIZE 64 +#define F2FS_MAX_KEY_SIZE 64 + +#define F2FS_KEY_DESC_PREFIX "f2fs:" +#define F2FS_KEY_DESC_PREFIX_SIZE 5 + +struct f2fs_encryption_key { + __u32 mode; + char raw[F2FS_MAX_KEY_SIZE]; + __u32 size; +} __attribute__((__packed__)); + +struct f2fs_crypt_info { + char ci_data_mode; + char ci_filename_mode; + char ci_flags; + struct crypto_ablkcipher *ci_ctfm; + struct key *ci_keyring_key; + char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE]; +}; + +#define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 +#define F2FS_WRITE_PATH_FL 0x00000002 + +struct f2fs_crypto_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + char flags; /* Flags */ +}; + +struct f2fs_completion_result { + struct completion completion; + int res; +}; + +#define DECLARE_F2FS_COMPLETION_RESULT(ecr) \ + struct f2fs_completion_result ecr = { \ + COMPLETION_INITIALIZER((ecr).completion), 0 } + +static inline int f2fs_encryption_key_size(int mode) +{ + switch (mode) { + case F2FS_ENCRYPTION_MODE_AES_256_XTS: + return F2FS_AES_256_XTS_KEY_SIZE; + case F2FS_ENCRYPTION_MODE_AES_256_GCM: + return F2FS_AES_256_GCM_KEY_SIZE; + case F2FS_ENCRYPTION_MODE_AES_256_CBC: + return F2FS_AES_256_CBC_KEY_SIZE; + case F2FS_ENCRYPTION_MODE_AES_256_CTS: + return F2FS_AES_256_CTS_KEY_SIZE; + default: + BUG(); + } + return 0; +} + +#define F2FS_FNAME_NUM_SCATTER_ENTRIES 4 +#define F2FS_CRYPTO_BLOCK_SIZE 16 +#define F2FS_FNAME_CRYPTO_DIGEST_SIZE 32 + +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct f2fs_encrypted_symlink_data { + __le16 len; + char encrypted_path[1]; +} __attribute__((__packed__)); + +/** + * This function is used to calculate the disk space required to + * store a filename of length l in encrypted symlink format. + */ +static inline u32 encrypted_symlink_data_len(u32 l) +{ + return (l + sizeof(struct f2fs_encrypted_symlink_data) - 1); +} +#endif /* _F2FS_CRYPTO_H */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8e68bb64..ca79cfd2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -20,12 +20,14 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" #include "segment.h" #include "xattr.h" #include "acl.h" +#include "trace.h" #include static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, @@ -41,18 +43,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, sb_start_pagefault(inode->i_sb); - /* force to convert with normal data indices */ - err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); - if (err) - goto out; + f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); /* block allocation */ f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_reserve_block(&dn, page->index); - f2fs_unlock_op(sbi); - if (err) + if (err) { + f2fs_unlock_op(sbi); goto out; + } + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); file_update_time(vma->vm_file); lock_page(page); @@ -90,9 +92,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, static const struct vm_operations_struct f2fs_file_vm_ops = { .fault = filemap_fault, - .map_pages = filemap_map_pages, .page_mkwrite = f2fs_vm_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int get_parent_ino(struct inode *inode, nid_t *pino) @@ -105,7 +105,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - if (update_dent_inode(inode, &dentry->d_name)) { + if (update_dent_inode(inode, inode, &dentry->d_name)) { dput(dentry); return 0; } @@ -122,6 +122,8 @@ static inline bool need_do_checkpoint(struct inode *inode) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; + else if (file_enc_name(inode) && need_dentry_mark(sbi, inode->i_ino)) + need_cp = true; else if (file_wrong_pino(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) @@ -130,10 +132,45 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) need_cp = true; + else if (test_opt(sbi, FASTBOOT)) + need_cp = true; + else if (sbi->active_logs == 2) + need_cp = true; return need_cp; } +static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + bool ret = false; + /* But we need to avoid that there are some inode updates */ + if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) + ret = true; + f2fs_put_page(i, 0); + return ret; +} + +static void try_to_fix_pino(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + nid_t pino; + + down_write(&fi->i_sem); + fi->xattr_ver = 0; + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + fi->i_pino = pino; + file_got_pino(inode); + up_write(&fi->i_sem); + + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + } else { + up_write(&fi->i_sem); + } +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -164,19 +201,21 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return ret; } + /* if the inode is dirty, let's recover all the time */ + if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { + update_inode_page(inode); + goto go_write; + } + /* * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { - struct page *i = find_get_page(NODE_MAPPING(sbi), ino); - /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { - f2fs_put_page(i, 0); + /* it may call write_inode just prior to fsync */ + if (need_inode_page_update(sbi, ino)) goto go_write; - } - f2fs_put_page(i, 0); if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) @@ -196,51 +235,45 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) up_read(&fi->i_sem); if (need_cp) { - nid_t pino; - /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); - down_write(&fi->i_sem); - F2FS_I(inode)->xattr_ver = 0; - if (file_wrong_pino(inode) && inode->i_nlink == 1 && - get_parent_ino(inode, &pino)) { - F2FS_I(inode)->i_pino = pino; - file_got_pino(inode); - up_write(&fi->i_sem); - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - } else { - up_write(&fi->i_sem); - } - } else { + /* + * We've secured consistency through sync_fs. Following pino + * will be used only for fsynced inodes after checkpoint. + */ + try_to_fix_pino(inode); + clear_inode_flag(fi, FI_APPEND_WRITE); + clear_inode_flag(fi, FI_UPDATE_WRITE); + goto out; + } sync_nodes: - sync_node_pages(sbi, ino, &wbc); + sync_node_pages(sbi, ino, &wbc); - if (need_inode_block_update(sbi, ino)) { - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - goto sync_nodes; - } + /* if cp_error was enabled, we should avoid infinite loop */ + if (unlikely(f2fs_cp_error(sbi))) + goto out; - ret = wait_on_node_pages_writeback(sbi, ino); - if (ret) - goto out; + if (need_inode_block_update(sbi, ino)) { + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + goto sync_nodes; + } - /* once recovery info is written, don't need to tack this */ - remove_dirty_inode(sbi, ino, APPEND_INO); - clear_inode_flag(fi, FI_APPEND_WRITE); + ret = wait_on_node_pages_writeback(sbi, ino); + if (ret) + goto out; + + /* once recovery info is written, don't need to tack this */ + remove_dirty_inode(sbi, ino, APPEND_INO); + clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: - remove_dirty_inode(sbi, ino, UPDATE_INO); - clear_inode_flag(fi, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(F2FS_I_SB(inode)); - } + remove_dirty_inode(sbi, ino, UPDATE_INO); + clear_inode_flag(fi, FI_UPDATE_WRITE); + ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + f2fs_trace_ios(NULL, 1); return ret; } @@ -279,6 +312,25 @@ static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, return false; } +static inline int unsigned_offsets(struct file *file) +{ + return file->f_mode & FMODE_UNSIGNED_OFFSET; +} + +static loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) +{ + if (offset < 0 && !unsigned_offsets(file)) + return -EINVAL; + if (offset > maxsize) + return -EINVAL; + + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } + return offset; +} + static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; @@ -296,7 +348,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; /* handle inline data case */ - if (f2fs_has_inline_data(inode)) { + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { if (whence == SEEK_HOLE) data_ofs = isize; goto found; @@ -327,7 +379,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; dn.ofs_in_node++, pgofs++, - data_ofs = pgofs << PAGE_CACHE_SHIFT) { + data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) { block_t blkaddr; blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); @@ -374,11 +426,38 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct inode *inode = file_inode(file); + + if (f2fs_encrypted_inode(inode)) { + int err = f2fs_get_encryption_info(inode); + if (err) + return 0; + } + + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; return 0; } +static int f2fs_file_open(struct inode *inode, struct file *filp) +{ + int ret = generic_file_open(inode, filp); + + if (!ret && f2fs_encrypted_inode(inode)) { + ret = f2fs_get_encryption_info(inode); + if (ret) + ret = -EACCES; + } + return ret; +} + int truncate_data_blocks_range(struct dnode_of_data *dn, int count) { int nr_free = 0, ofs = dn->ofs_in_node; @@ -394,8 +473,13 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) if (blkaddr == NULL_ADDR) continue; - update_extent_cache(NULL_ADDR, dn); + dn->data_blkaddr = NULL_ADDR; + set_data_blkaddr(dn); + f2fs_update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); + if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) + clear_inode_flag(F2FS_I(dn->inode), + FI_FIRST_BLOCK_WRITTEN); nr_free++; } if (nr_free) { @@ -415,32 +499,35 @@ void truncate_data_blocks(struct dnode_of_data *dn) truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); } -static void truncate_partial_data_page(struct inode *inode, u64 from) +static int truncate_partial_data_page(struct inode *inode, u64 from, + bool cache_only) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); + pgoff_t index = from >> PAGE_CACHE_SHIFT; + struct address_space *mapping = inode->i_mapping; struct page *page; - if (f2fs_has_inline_data(inode)) - return truncate_inline_data(inode, from); + if (!offset && !cache_only) + return 0; - if (!offset) - return; + if (cache_only) { + page = grab_cache_page(mapping, index); + if (page && PageUptodate(page)) + goto truncate_out; + f2fs_put_page(page, 1); + return 0; + } - page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); + page = get_lock_data_page(inode, index); if (IS_ERR(page)) - return; - - lock_page(page); - if (unlikely(!PageUptodate(page) || - page->mapping != inode->i_mapping)) - goto out; - + return 0; +truncate_out: f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); - set_page_dirty(page); - -out: + if (!cache_only || !f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode)) + set_page_dirty(page); f2fs_put_page(page, 1); + return 0; } int truncate_blocks(struct inode *inode, u64 from, bool lock) @@ -450,27 +537,36 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; + struct page *ipage; + bool truncate_page = false; trace_f2fs_truncate_blocks_enter(inode, from); - if (f2fs_has_inline_data(inode)) - goto done; - - free_from = (pgoff_t) - ((from + blocksize - 1) >> (sbi->log_blocksize)); + free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); if (lock) f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } + + if (f2fs_has_inline_data(inode)) { + if (truncate_inline_inode(ipage, from)) + set_page_dirty(ipage); + f2fs_put_page(ipage, 1); + truncate_page = true; + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - if (lock) - f2fs_unlock_op(sbi); - trace_f2fs_truncate_blocks_exit(inode, err); - return err; + goto out; } count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); @@ -486,11 +582,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); +out: if (lock) f2fs_unlock_op(sbi); -done: + /* lastly zero out the first data page */ - truncate_partial_data_page(inode, from); + if (!err) + err = truncate_partial_data_page(inode, from, truncate_page); trace_f2fs_truncate_blocks_exit(inode, err); return err; @@ -504,6 +602,12 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); + /* we should check inline_data size */ + if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { + if (f2fs_convert_inline_inode(inode)) + return; + } + if (!truncate_blocks(inode, i_size_read(inode), true)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); @@ -561,27 +665,27 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (attr->ia_valid & ATTR_SIZE) { - err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); - if (err) - return err; + if (f2fs_encrypted_inode(inode) && + f2fs_get_encryption_info(inode)) + return -EACCES; - if (attr->ia_size != i_size_read(inode)) { + if (attr->ia_size <= i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); f2fs_balance_fs(F2FS_I_SB(inode)); } else { /* - * giving a chance to truncate blocks past EOF which - * are fallocated with FALLOC_FL_KEEP_SIZE. + * do not trim all blocks after i_size if target size is + * larger than i_size. */ - f2fs_truncate(inode); + truncate_setsize(inode, attr->ia_size); } } __setattr_copy(inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(inode, get_inode_mode(inode)); + err = f2fs_acl_chmod(inode); if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { inode->i_mode = fi->i_acl_mode; clear_inode_flag(fi, FI_ACL_MODE); @@ -596,7 +700,6 @@ const struct inode_operations f2fs_file_inode_operations = { .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, - .set_acl = f2fs_set_acl, #ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -661,13 +764,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; - /* skip punching hole beyond i_size */ - if (offset >= inode->i_size) - return ret; - - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -706,6 +807,320 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) return ret; } +static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + int ret = 0; + + f2fs_lock_op(sbi); + + for (; end < nrpages; start++, end++) { + block_t new_addr, old_addr; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA); + if (ret && ret != -ENOENT) { + goto out; + } else if (ret == -ENOENT) { + new_addr = NULL_ADDR; + } else { + new_addr = dn.data_blkaddr; + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + } + + if (new_addr == NULL_ADDR) { + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA); + if (ret && ret != -ENOENT) + goto out; + else if (ret == -ENOENT) + continue; + + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_dnode(&dn); + continue; + } else { + truncate_data_blocks_range(&dn, 1); + } + + f2fs_put_dnode(&dn); + } else { + struct page *ipage; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + ret = PTR_ERR(ipage); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + ret = f2fs_reserve_block(&dn, start); + if (ret) + goto out; + + old_addr = dn.data_blkaddr; + if (old_addr != NEW_ADDR && new_addr == NEW_ADDR) { + dn.data_blkaddr = NULL_ADDR; + f2fs_update_extent_cache(&dn); + invalidate_blocks(sbi, old_addr); + + dn.data_blkaddr = new_addr; + set_data_blkaddr(&dn); + } else if (new_addr != NEW_ADDR) { + struct node_info ni; + + get_node_info(sbi, dn.nid, &ni); + f2fs_replace_block(sbi, &dn, old_addr, new_addr, + ni.version, true); + } + + f2fs_put_dnode(&dn); + } + } + ret = 0; +out: + f2fs_unlock_op(sbi); + return ret; +} + +static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) +{ + pgoff_t pg_start, pg_end; + loff_t new_size; + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (offset + len >= i_size_read(inode)) + return -EINVAL; + + /* collapse range should be aligned to block size of f2fs. */ + if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) + return -EINVAL; + + pg_start = offset >> PAGE_CACHE_SHIFT; + pg_end = (offset + len) >> PAGE_CACHE_SHIFT; + + /* write out all dirty pages from offset */ + ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); + if (ret) + return ret; + + truncate_pagecache(inode, 0, offset); + + ret = f2fs_do_collapse(inode, pg_start, pg_end); + if (ret) + return ret; + + new_size = i_size_read(inode) - len; + + ret = truncate_blocks(inode, new_size, true); + if (!ret) + i_size_write(inode, new_size); + + return ret; +} + +static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, + int mode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct address_space *mapping = inode->i_mapping; + pgoff_t index, pg_start, pg_end; + loff_t new_size = i_size_read(inode); + loff_t off_start, off_end; + int ret = 0; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + ret = inode_newsize_ok(inode, (len + offset)); + if (ret) + return ret; + + f2fs_balance_fs(sbi); + + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } + + ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); + if (ret) + return ret; + + truncate_pagecache_range(inode, offset, offset + len - 1); + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + if (pg_start == pg_end) { + fill_zero(inode, pg_start, off_start, off_end - off_start); + if (offset + len > new_size) + new_size = offset + len; + new_size = max_t(loff_t, new_size, offset + len); + } else { + if (off_start) { + fill_zero(inode, pg_start++, off_start, + PAGE_CACHE_SIZE - off_start); + new_size = max_t(loff_t, new_size, + pg_start << PAGE_CACHE_SHIFT); + } + + for (index = pg_start; index < pg_end; index++) { + struct dnode_of_data dn; + struct page *ipage; + + f2fs_lock_op(sbi); + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + ret = PTR_ERR(ipage); + f2fs_unlock_op(sbi); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + ret = f2fs_reserve_block(&dn, index); + if (ret) { + f2fs_unlock_op(sbi); + goto out; + } + + if (dn.data_blkaddr != NEW_ADDR) { + invalidate_blocks(sbi, dn.data_blkaddr); + + dn.data_blkaddr = NEW_ADDR; + set_data_blkaddr(&dn); + + dn.data_blkaddr = NULL_ADDR; + f2fs_update_extent_cache(&dn); + } + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + + new_size = max_t(loff_t, new_size, + (index + 1) << PAGE_CACHE_SHIFT); + } + + if (off_end) { + fill_zero(inode, pg_end, 0, off_end); + new_size = max_t(loff_t, new_size, offset + len); + } + } + +out: + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { + i_size_write(inode, new_size); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + return ret; +} + +static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + pgoff_t pg_start, pg_end, delta, nrpages, idx; + loff_t new_size; + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + new_size = i_size_read(inode) + len; + if (new_size > inode->i_sb->s_maxbytes) + return -EFBIG; + + if (offset >= i_size_read(inode)) + return -EINVAL; + + /* insert range should be aligned to block size of f2fs. */ + if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) + return -EINVAL; + + f2fs_balance_fs(sbi); + + ret = truncate_blocks(inode, i_size_read(inode), true); + if (ret) + return ret; + + /* write out all dirty pages from offset */ + ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); + if (ret) + return ret; + + truncate_pagecache(inode, 0, offset); + + pg_start = offset >> PAGE_CACHE_SHIFT; + pg_end = (offset + len) >> PAGE_CACHE_SHIFT; + delta = pg_end - pg_start; + nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + + for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) { + struct dnode_of_data dn; + struct page *ipage; + block_t new_addr, old_addr; + + f2fs_lock_op(sbi); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, idx, LOOKUP_NODE_RA); + if (ret && ret != -ENOENT) { + goto out; + } else if (ret == -ENOENT) { + goto next; + } else if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_dnode(&dn); + goto next; + } else { + new_addr = dn.data_blkaddr; + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + } + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + ret = PTR_ERR(ipage); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + ret = f2fs_reserve_block(&dn, idx + delta); + if (ret) + goto out; + + old_addr = dn.data_blkaddr; + f2fs_bug_on(sbi, old_addr != NEW_ADDR); + + if (new_addr != NEW_ADDR) { + struct node_info ni; + + get_node_info(sbi, dn.nid, &ni); + f2fs_replace_block(sbi, &dn, old_addr, new_addr, + ni.version, true); + } + f2fs_put_dnode(&dn); +next: + f2fs_unlock_op(sbi); + } + + i_size_write(inode, new_size); + return 0; +out: + f2fs_unlock_op(sbi); + return ret; +} + static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { @@ -721,9 +1136,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - ret = f2fs_convert_inline_data(inode, offset + len, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -765,33 +1182,67 @@ static int expand_inode_data(struct inode *inode, loff_t offset, return ret; } +#define FALLOC_FL_COLLAPSE_RANGE 0X08 +#define FALLOC_FL_ZERO_RANGE 0X10 +#define FALLOC_FL_INSERT_RANGE 0X20 + static long f2fs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); - long ret; + long ret = 0; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (f2fs_encrypted_inode(inode) && + (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) + return -EOPNOTSUPP; + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | + FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); - if (mode & FALLOC_FL_PUNCH_HOLE) + if (mode & FALLOC_FL_PUNCH_HOLE) { + if (offset >= inode->i_size) + goto out; + ret = punch_hole(inode, offset, len); - else + } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { + ret = f2fs_collapse_range(inode, offset, len); + } else if (mode & FALLOC_FL_ZERO_RANGE) { + ret = f2fs_zero_range(inode, offset, len, mode); + } else if (mode & FALLOC_FL_INSERT_RANGE) { + ret = f2fs_insert_range(inode, offset, len); + } else { ret = expand_inode_data(inode, offset, len, mode); + } if (!ret) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); } +out: mutex_unlock(&inode->i_mutex); trace_f2fs_fallocate(inode, mode, offset, len, ret); return ret; } +static int f2fs_release_file(struct inode *inode, struct file *filp) +{ + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode)) + commit_inmem_pages(inode, true); + if (f2fs_is_volatile_file(inode)) { + set_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + filemap_fdatawrite(inode->i_mapping); + clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + } + return 0; +} + #define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) #define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) @@ -862,19 +1313,28 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) return ret; } +static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + + return put_user(inode->i_generation, (int __user *)arg); +} + static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (!inode_owner_or_capable(inode)) return -EACCES; - f2fs_balance_fs(sbi); + f2fs_balance_fs(F2FS_I_SB(inode)); + + if (f2fs_is_atomic_file(inode)) + return 0; set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); - return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); + return f2fs_convert_inline_inode(inode); } static int f2fs_ioc_commit_atomic_write(struct file *filp) @@ -897,6 +1357,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); mnt_drop_write_file(filp); + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); return ret; } @@ -907,7 +1368,89 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + if (f2fs_is_volatile_file(inode)) + return 0; + set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + + return f2fs_convert_inline_inode(inode); +} + +static int f2fs_ioc_release_volatile_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (!f2fs_is_volatile_file(inode)) + return 0; + + if (!f2fs_is_first_block_written(inode)) + return truncate_partial_data_page(inode, 0, true); + + punch_hole(inode, 0, F2FS_BLKSIZE); + return 0; +} + +static int f2fs_ioc_abort_volatile_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + int ret; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + f2fs_balance_fs(F2FS_I_SB(inode)); + + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + commit_inmem_pages(inode, false); + + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct super_block *sb = sbi->sb; + __u32 in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__u32 __user *)arg)) + return -EFAULT; + + switch (in) { + case FS_GOING_DOWN_FULLSYNC: + sb = freeze_bdev(sb->s_bdev); + if (sb && !IS_ERR(sb)) { + f2fs_stop_checkpoint(sbi); + thaw_bdev(sb->s_bdev, sb); + } + break; + case FS_GOING_DOWN_METASYNC: + /* do checkpoint only */ + f2fs_sync_fs(sb, 1); + f2fs_stop_checkpoint(sbi); + break; + case FS_GOING_DOWN_NOSYNC: + f2fs_stop_checkpoint(sbi); + break; + case FS_GOING_STOP_GC: + stop_gc_thread(sbi); + set_sbi_flag(sbi, SBI_NO_GC); + break; + default: + return -EINVAL; + } return 0; } @@ -941,6 +1484,86 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) return 0; } +static bool uuid_is_nonzero(__u8 u[16]) +{ + int i; + + for (i = 0; i < 16; i++) + if (u[i]) + return true; + return false; +} + +static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + struct f2fs_encryption_policy policy; + struct inode *inode = file_inode(filp); + + if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg, + sizeof(policy))) + return -EFAULT; + + return f2fs_process_policy(&policy, inode); +#else + return -EOPNOTSUPP; +#endif +} + +static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) +{ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + struct f2fs_encryption_policy policy; + struct inode *inode = file_inode(filp); + int err; + + err = f2fs_get_policy(inode, &policy); + if (err) + return err; + + if (copy_to_user((struct f2fs_encryption_policy __user *)arg, &policy, + sizeof(policy))) + return -EFAULT; + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int err; + + if (!f2fs_sb_has_crypto(inode->i_sb)) + return -EOPNOTSUPP; + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + + err = mnt_want_write_file(filp); + if (err) + return err; + + /* update superblock with uuid */ + generate_random_uuid(sbi->raw_super->encrypt_pw_salt); + + err = f2fs_commit_super(sbi, false); + + mnt_drop_write_file(filp); + if (err) { + /* undo new data */ + memset(sbi->raw_super->encrypt_pw_salt, 0, 16); + return err; + } +got_it: + if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, + 16)) + return -EFAULT; + return 0; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -948,14 +1571,28 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_getflags(filp, arg); case F2FS_IOC_SETFLAGS: return f2fs_ioc_setflags(filp, arg); + case F2FS_IOC_GETVERSION: + return f2fs_ioc_getversion(filp, arg); case F2FS_IOC_START_ATOMIC_WRITE: return f2fs_ioc_start_atomic_write(filp); case F2FS_IOC_COMMIT_ATOMIC_WRITE: return f2fs_ioc_commit_atomic_write(filp); case F2FS_IOC_START_VOLATILE_WRITE: return f2fs_ioc_start_volatile_write(filp); + case F2FS_IOC_RELEASE_VOLATILE_WRITE: + return f2fs_ioc_release_volatile_write(filp); + case F2FS_IOC_ABORT_VOLATILE_WRITE: + return f2fs_ioc_abort_volatile_write(filp); + case FS_IOC_SHUTDOWN: + return f2fs_ioc_shutdown(filp, arg); case FITRIM: return f2fs_ioc_fitrim(filp, arg); + case F2FS_IOC_SET_ENCRYPTION_POLICY: + return f2fs_ioc_set_encryption_policy(filp, arg); + case F2FS_IOC_GET_ENCRYPTION_POLICY: + return f2fs_ioc_get_encryption_policy(filp, arg); + case F2FS_IOC_GET_ENCRYPTION_PWSALT: + return f2fs_ioc_get_encryption_pwsalt(filp, arg); default: return -ENOTTY; } @@ -980,11 +1617,12 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations f2fs_file_operations = { .llseek = f2fs_llseek, - .read = new_sync_read, - .write = new_sync_write, - .read_iter = generic_file_read_iter, - .write_iter = generic_file_write_iter, - .open = generic_file_open, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .open = f2fs_file_open, + .release = f2fs_release_file, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, @@ -993,5 +1631,5 @@ const struct file_operations f2fs_file_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = generic_file_splice_write, }; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2a8f4acd..5429843b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -24,8 +24,6 @@ #include "gc.h" #include -static struct kmem_cache *winode_slab; - static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; @@ -35,18 +33,21 @@ static int gc_thread_func(void *data) wait_ms = gc_th->min_sleep_time; + set_freezable(); do { + + wait_event_interruptible_timeout(*wq, + kthread_should_stop() || freezing(current), + msecs_to_jiffies(wait_ms)); + if (try_to_freeze()) continue; - else - wait_event_interruptible_timeout(*wq, - kthread_should_stop(), - msecs_to_jiffies(wait_ms)); + if (kthread_should_stop()) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { - wait_ms = increase_sleep_time(gc_th, wait_ms); + increase_sleep_time(gc_th, &wait_ms); continue; } @@ -67,15 +68,15 @@ static int gc_thread_func(void *data) continue; if (!is_idle(sbi)) { - wait_ms = increase_sleep_time(gc_th, wait_ms); + increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); continue; } if (has_enough_invalid_blocks(sbi)) - wait_ms = decrease_sleep_time(gc_th, wait_ms); + decrease_sleep_time(gc_th, &wait_ms); else - wait_ms = increase_sleep_time(gc_th, wait_ms); + increase_sleep_time(gc_th, &wait_ms); stat_inc_bggc_count(sbi); @@ -96,8 +97,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) dev_t dev = sbi->sb->s_bdev->bd_dev; int err = 0; - if (!test_opt(sbi, BG_GC)) - goto out; gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); if (!gc_th) { err = -ENOMEM; @@ -340,37 +339,39 @@ static const struct victim_selection default_v_ops = { .get_victim = get_victim_by_default, }; -static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) { struct inode_entry *ie; - list_for_each_entry(ie, ilist, list) - if (ie->inode->i_ino == ino) - return ie->inode; + ie = radix_tree_lookup(&gc_list->iroot, ino); + if (ie) + return ie->inode; return NULL; } -static void add_gc_inode(struct inode *inode, struct list_head *ilist) +static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) { struct inode_entry *new_ie; - if (inode == find_gc_inode(inode->i_ino, ilist)) { + if (inode == find_gc_inode(gc_list, inode->i_ino)) { iput(inode); return; } - - new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); + new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); new_ie->inode = inode; - list_add_tail(&new_ie->list, ilist); + + f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); + list_add_tail(&new_ie->list, &gc_list->ilist); } -static void put_gc_inode(struct list_head *ilist) +static void put_gc_inode(struct gc_inode_list *gc_list) { struct inode_entry *ie, *next_ie; - list_for_each_entry_safe(ie, next_ie, ilist, list) { + list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { + radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); - kmem_cache_free(winode_slab, ie); + kmem_cache_free(inode_entry_slab, ie); } } @@ -437,7 +438,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, set_page_dirty(node_page); } f2fs_put_page(node_page, 1); - stat_inc_node_blk_count(sbi, 1); + stat_inc_node_blk_count(sbi, 1, gc_type); } if (initial) { @@ -520,12 +521,79 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return 1; } -static void move_data_page(struct inode *inode, struct page *page, int gc_type) +static void move_encrypted_block(struct inode *inode, block_t bidx) { struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), .type = DATA, - .rw = WRITE_SYNC, + .rw = READ_SYNC, + .encrypted_page = NULL, }; + struct dnode_of_data dn; + struct f2fs_summary sum; + struct node_info ni; + struct page *page; + int err; + + /* do not read out */ + page = grab_cache_page(inode->i_mapping, bidx); + if (!page) + return; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); + if (err) + goto out; + + if (unlikely(dn.data_blkaddr == NULL_ADDR)) + goto put_out; + + get_node_info(fio.sbi, dn.nid, &ni); + set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); + + /* read page */ + fio.page = page; + fio.blk_addr = dn.data_blkaddr; + + fio.encrypted_page = grab_cache_page(META_MAPPING(fio.sbi), fio.blk_addr); + if (!fio.encrypted_page) + goto put_out; + + f2fs_submit_page_bio(&fio); + + /* allocate block address */ + f2fs_wait_on_page_writeback(dn.node_page, NODE); + + allocate_data_block(fio.sbi, NULL, fio.blk_addr, + &fio.blk_addr, &sum, CURSEG_COLD_DATA); + dn.data_blkaddr = fio.blk_addr; + + /* write page */ + lock_page(fio.encrypted_page); + set_page_writeback(fio.encrypted_page); + fio.rw = WRITE_SYNC; + f2fs_submit_page_mbio(&fio); + + set_data_blkaddr(&dn); + f2fs_update_extent_cache(&dn); + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + + f2fs_put_page(fio.encrypted_page, 1); +put_out: + f2fs_put_dnode(&dn); +out: + f2fs_put_page(page, 1); +} + +static void move_data_page(struct inode *inode, block_t bidx, int gc_type) +{ + struct page *page; + + page = get_lock_data_page(inode, bidx); + if (IS_ERR(page)) + return; if (gc_type == BG_GC) { if (PageWriteback(page)) @@ -533,12 +601,19 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) set_page_dirty(page); set_cold_data(page); } else { + struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(inode), + .type = DATA, + .rw = WRITE_SYNC, + .page = page, + .encrypted_page = NULL, + }; f2fs_wait_on_page_writeback(page, DATA); if (clear_page_dirty_for_io(page)) inode_dec_dirty_pages(inode); set_cold_data(page); - do_write_data_page(page, &fio); + do_write_data_page(&fio); clear_cold_data(page); } out: @@ -553,7 +628,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) * the victim data block is ignored. */ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, - struct list_head *ilist, unsigned int segno, int gc_type) + struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; struct f2fs_summary *entry; @@ -601,31 +676,37 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (IS_ERR(inode) || is_bad_inode(inode)) continue; - start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + /* if encrypted inode, let's go phase 3 */ + if (f2fs_encrypted_inode(inode) && + S_ISREG(inode->i_mode)) { + add_gc_inode(gc_list, inode); + continue; + } - data_page = find_data_page(inode, - start_bidx + ofs_in_node, false); - if (IS_ERR(data_page)) - goto next_iput; + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + data_page = get_read_data_page(inode, + start_bidx + ofs_in_node, READA); + if (IS_ERR(data_page)) { + iput(inode); + continue; + } f2fs_put_page(data_page, 0); - add_gc_inode(inode, ilist); - } else { - inode = find_gc_inode(dni.ino, ilist); - if (inode) { - start_bidx = start_bidx_of_node(nofs, - F2FS_I(inode)); - data_page = get_lock_data_page(inode, - start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; - move_data_page(inode, data_page, gc_type); - stat_inc_data_blk_count(sbi, 1); - } + add_gc_inode(gc_list, inode); + continue; + } + + /* phase 3 */ + inode = find_gc_inode(gc_list, dni.ino); + if (inode) { + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)) + + ofs_in_node; + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + move_encrypted_block(inode, start_bidx); + else + move_data_page(inode, start_bidx, gc_type); + stat_inc_data_blk_count(sbi, 1, gc_type); } - continue; -next_iput: - iput(inode); } if (++phase < 4) @@ -646,18 +727,20 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type, int type) + int gc_type) { struct sit_info *sit_i = SIT_I(sbi); int ret; + mutex_lock(&sit_i->sentry_lock); - ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, + NO_CHECK_TYPE, LFS); mutex_unlock(&sit_i->sentry_lock); return ret; } static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, - struct list_head *ilist, int gc_type) + struct gc_inode_list *gc_list, int gc_type) { struct page *sum_page; struct f2fs_summary_block *sum; @@ -670,34 +753,44 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, sum = page_address(sum_page); + /* + * this is to avoid deadlock: + * - lock_page(sum_page) - f2fs_replace_block + * - check_valid_map() - mutex_lock(sentry_lock) + * - mutex_lock(sentry_lock) - change_curseg() + * - lock_page(sum_page) + */ + unlock_page(sum_page); + switch (GET_SUM_TYPE((&sum->footer))) { case SUM_TYPE_NODE: gc_node_segment(sbi, sum->entries, segno, gc_type); break; case SUM_TYPE_DATA: - gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); break; } blk_finish_plug(&plug); - stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); + stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type); stat_inc_call_count(sbi->stat_info); - f2fs_put_page(sum_page, 1); + f2fs_put_page(sum_page, 0); } int f2fs_gc(struct f2fs_sb_info *sbi) { - struct list_head ilist; unsigned int segno, i; int gc_type = BG_GC; int nfree = 0; int ret = -1; - struct cp_control cpc = { - .reason = CP_SYNC, + struct cp_control cpc; + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(GFP_NOFS), }; - INIT_LIST_HEAD(&ilist); + cpc.reason = __get_cp_reason(sbi); gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; @@ -709,7 +802,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) write_checkpoint(sbi, &cpc); } - if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + if (!__get_victim(sbi, &segno, gc_type)) goto stop; ret = 0; @@ -719,7 +812,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) META_SSA); for (i = 0; i < sbi->segs_per_sec; i++) - do_garbage_collect(sbi, segno + i, &ilist, gc_type); + do_garbage_collect(sbi, segno + i, &gc_list, gc_type); if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -735,7 +828,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) stop: mutex_unlock(&sbi->gc_mutex); - put_gc_inode(&ilist); + put_gc_inode(&gc_list); return ret; } @@ -743,17 +836,3 @@ void build_gc_manager(struct f2fs_sb_info *sbi) { DIRTY_I(sbi)->v_ops = &default_v_ops; } - -int __init create_gc_caches(void) -{ - winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", - sizeof(struct inode_entry)); - if (!winode_slab) - return -ENOMEM; - return 0; -} - -void destroy_gc_caches(void) -{ - kmem_cache_destroy(winode_slab); -} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 16f0b2b2..b4a65be9 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -35,9 +35,9 @@ struct f2fs_gc_kthread { unsigned int gc_idle; }; -struct inode_entry { - struct list_head list; - struct inode *inode; +struct gc_inode_list { + struct list_head ilist; + struct radix_tree_root iroot; }; /* @@ -64,26 +64,26 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; } -static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th, + long *wait) { - if (wait == gc_th->no_gc_sleep_time) - return wait; + if (*wait == gc_th->no_gc_sleep_time) + return; - wait += gc_th->min_sleep_time; - if (wait > gc_th->max_sleep_time) - wait = gc_th->max_sleep_time; - return wait; + *wait += gc_th->min_sleep_time; + if (*wait > gc_th->max_sleep_time) + *wait = gc_th->max_sleep_time; } -static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, + long *wait) { - if (wait == gc_th->no_gc_sleep_time) - wait = gc_th->max_sleep_time; + if (*wait == gc_th->no_gc_sleep_time) + *wait = gc_th->max_sleep_time; - wait -= gc_th->min_sleep_time; - if (wait <= gc_th->min_sleep_time) - wait = gc_th->min_sleep_time; - return wait; + *wait -= gc_th->min_sleep_time; + if (*wait <= gc_th->min_sleep_time) + *wait = gc_th->min_sleep_time; } static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index a844fcfb..71b7206c 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -79,8 +79,7 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) const unsigned char *name = name_info->name; size_t len = name_info->len; - if ((len <= 2) && (name[0] == '.') && - (name[1] == '.' || name[1] == '\0')) + if (is_dot_dotdot(name_info)) return 0; /* Initialize the default seed for the hash checksum functions */ diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 88036fd7..1661c9c2 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -13,37 +13,75 @@ #include "f2fs.h" -bool f2fs_may_inline(struct inode *inode) +bool f2fs_may_inline_data(struct inode *inode) { - block_t nr_blocks; - loff_t i_size; - if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) return false; if (f2fs_is_atomic_file(inode)) return false; - nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; - if (inode->i_blocks > nr_blocks) + if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) + return false; + + if (i_size_read(inode) > MAX_INLINE_DATA) return false; - i_size = i_size_read(inode); - if (i_size > MAX_INLINE_DATA) + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) return false; return true; } -int f2fs_read_inline_data(struct inode *inode, struct page *page) +bool f2fs_may_inline_dentry(struct inode *inode) +{ + if (!test_opt(F2FS_I_SB(inode), INLINE_DENTRY)) + return false; + + if (!S_ISDIR(inode->i_mode)) + return false; + + return true; +} + +void read_inline_data(struct page *page, struct page *ipage) { - struct page *ipage; void *src_addr, *dst_addr; - if (page->index) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - goto out; - } + if (PageUptodate(page)) + return; + + f2fs_bug_on(F2FS_P_SB(page), page->index); + + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + + /* Copy the whole inline data block */ + src_addr = inline_data_addr(ipage); + dst_addr = kmap_atomic(page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + flush_dcache_page(page); + kunmap_atomic(dst_addr); + SetPageUptodate(page); +} + +bool truncate_inline_inode(struct page *ipage, u64 from) +{ + void *addr; + + if (from >= MAX_INLINE_DATA) + return false; + + addr = inline_data_addr(ipage); + + f2fs_wait_on_page_writeback(ipage, NODE); + memset(addr + from, 0, MAX_INLINE_DATA - from); + + return true; +} + +int f2fs_read_inline_data(struct inode *inode, struct page *page) +{ + struct page *ipage; ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { @@ -51,112 +89,119 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) return PTR_ERR(ipage); } - zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + if (!f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + return -EAGAIN; + } - /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); - f2fs_put_page(ipage, 1); + if (page->index) + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + else + read_inline_data(page, ipage); -out: SetPageUptodate(page); + f2fs_put_page(ipage, 1); unlock_page(page); - return 0; } -static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) +int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { - int err = 0; - struct page *ipage; - struct dnode_of_data dn; void *src_addr, *dst_addr; - block_t new_blk_addr; - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_io_info fio = { + .sbi = F2FS_I_SB(dn->inode), .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, + .page = page, + .encrypted_page = NULL, }; + int dirty, err; - f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto out; - } + f2fs_bug_on(F2FS_I_SB(dn->inode), page->index); - /* someone else converted inline_data already */ - if (!f2fs_has_inline_data(inode)) - goto out; + if (!f2fs_exist_data(dn->inode)) + goto clear_out; - /* - * i_addr[0] is not used for inline data, - * so reserving new block will not destroy inline data - */ - set_new_dnode(&dn, inode, ipage, NULL, 0); - err = f2fs_reserve_block(&dn, 0); + err = f2fs_reserve_block(dn, 0); if (err) - goto out; + return err; f2fs_wait_on_page_writeback(page, DATA); + + if (PageUptodate(page)) + goto no_update; + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); + src_addr = inline_data_addr(dn->inode_page); + dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); + flush_dcache_page(page); + kunmap_atomic(dst_addr); SetPageUptodate(page); +no_update: + /* clear dirty state */ + dirty = clear_page_dirty_for_io(page); /* write data page to try to make data consistent */ set_page_writeback(page); - write_data_page(page, &dn, &new_blk_addr, &fio); - update_extent_cache(new_blk_addr, &dn); + fio.blk_addr = dn->data_blkaddr; + write_data_page(dn, &fio); + set_data_blkaddr(dn); + f2fs_update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); + if (dirty) + inode_dec_dirty_pages(dn->inode); - /* clear inline data and flag after data writeback */ - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_dec_inline_inode(inode); + /* this converted inline_data should be recovered. */ + set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); - sync_inode_page(&dn); - f2fs_put_dnode(&dn); -out: - f2fs_unlock_op(sbi); - return err; + /* clear inline data and flag after data writeback */ + truncate_inline_inode(dn->inode_page, 0); +clear_out: + stat_dec_inline_inode(dn->inode); + f2fs_clear_inline_inode(dn->inode); + sync_inode_page(dn); + f2fs_put_dnode(dn); + return 0; } -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, - struct page *page) +int f2fs_convert_inline_inode(struct inode *inode) { - struct page *new_page = page; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + struct page *ipage, *page; + int err = 0; - if (!f2fs_has_inline_data(inode)) - return 0; - else if (to_size <= MAX_INLINE_DATA) - return 0; + page = grab_cache_page(inode->i_mapping, 0); + if (!page) + return -ENOMEM; + + f2fs_lock_op(sbi); - if (!page || page->index != 0) { - new_page = grab_cache_page(inode->i_mapping, 0); - if (!new_page) - return -ENOMEM; + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; } - err = __f2fs_convert_inline_data(inode, new_page); - if (!page || page->index != 0) - f2fs_put_page(new_page, 1); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) + err = f2fs_convert_inline_page(&dn, page); + + f2fs_put_dnode(&dn); +out: + f2fs_unlock_op(sbi); + + f2fs_put_page(page, 1); return err; } -int f2fs_write_inline_data(struct inode *inode, - struct page *page, unsigned size) +int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; - struct page *ipage; struct dnode_of_data dn; int err; @@ -164,49 +209,28 @@ int f2fs_write_inline_data(struct inode *inode, err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; - ipage = dn.inode_page; - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - src_addr = kmap(page); - dst_addr = inline_data_addr(ipage); - memcpy(dst_addr, src_addr, size); - kunmap(page); - - /* Release the first data block if it is allocated */ if (!f2fs_has_inline_data(inode)) { - truncate_data_blocks_range(&dn, 1); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_inc_inline_inode(inode); + f2fs_put_dnode(&dn); + return -EAGAIN; } + f2fs_bug_on(F2FS_I_SB(inode), page->index); + + f2fs_wait_on_page_writeback(dn.inode_page, NODE); + src_addr = kmap_atomic(page); + dst_addr = inline_data_addr(dn.inode_page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + kunmap_atomic(src_addr); + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); f2fs_put_dnode(&dn); - return 0; } -void truncate_inline_data(struct inode *inode, u64 from) -{ - struct page *ipage; - - if (from >= MAX_INLINE_DATA) - return; - - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return; - - f2fs_wait_on_page_writeback(ipage, NODE); - - zero_user_segment(ipage, INLINE_DATA_OFFSET + from, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); -} - bool recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -236,6 +260,10 @@ bool recover_inline_data(struct inode *inode, struct page *npage) src_addr = inline_data_addr(npage); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + update_inode(inode, ipage); f2fs_put_page(ipage, 1); return true; @@ -244,16 +272,287 @@ bool recover_inline_data(struct inode *inode, struct page *npage) if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + truncate_inline_inode(ipage, 0); + f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { truncate_blocks(inode, 0, false); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } return false; } + +struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, + struct f2fs_filename *fname, struct page **res_page, + unsigned int flags) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_inline_dentry *inline_dentry; + struct qstr name = FSTR_TO_QSTR(&fname->disk_name); + struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; + struct page *ipage; + f2fs_hash_t namehash; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + namehash = f2fs_dentry_hash(&name); + + inline_dentry = inline_data_addr(ipage); + + make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); + de = find_target_dentry(fname, namehash, NULL, &d, flags); + unlock_page(ipage); + if (de) + *res_page = ipage; + else + f2fs_put_page(ipage, 0); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(sbi, d.max < 0); + return de; +} + +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir, + struct page **p) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + struct f2fs_dir_entry *de; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + dentry_blk = inline_data_addr(ipage); + de = &dentry_blk->dentry[1]; + *p = ipage; + unlock_page(ipage); + return de; +} + +int make_empty_inline_dir(struct inode *inode, struct inode *parent, + struct page *ipage) +{ + struct f2fs_inline_dentry *dentry_blk; + struct f2fs_dentry_ptr d; + + dentry_blk = inline_data_addr(ipage); + + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); + do_make_empty_dir(inode, parent, &d); + + set_page_dirty(ipage); + + /* update i_size to MAX_INLINE_DATA */ + if (i_size_read(inode) < MAX_INLINE_DATA) { + i_size_write(inode, MAX_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + } + return 0; +} + +static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, + struct f2fs_inline_dentry *inline_dentry) +{ + struct page *page; + struct dnode_of_data dn; + struct f2fs_dentry_block *dentry_blk; + int err; + + page = grab_cache_page(dir->i_mapping, 0); + if (!page) + return -ENOMEM; + + set_new_dnode(&dn, dir, ipage, NULL, 0); + err = f2fs_reserve_block(&dn, 0); + if (err) + goto out; + + f2fs_wait_on_page_writeback(page, DATA); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + + dentry_blk = kmap_atomic(page); + + /* copy data from inline dentry block to new dentry block */ + memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, + INLINE_DENTRY_BITMAP_SIZE); + memcpy(dentry_blk->dentry, inline_dentry->dentry, + sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); + memcpy(dentry_blk->filename, inline_dentry->filename, + NR_INLINE_DENTRY * F2FS_SLOT_LEN); + + kunmap_atomic(dentry_blk); + SetPageUptodate(page); + set_page_dirty(page); + + /* clear inline dir and flag after data writeback */ + truncate_inline_inode(ipage, 0); + + stat_dec_inline_dir(dir); + clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); + + if (i_size_read(dir) < PAGE_CACHE_SIZE) { + i_size_write(dir, PAGE_CACHE_SIZE); + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + + sync_inode_page(&dn); +out: + f2fs_put_page(page, 1); + return err; +} + +int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, + struct inode *inode, nid_t ino, umode_t mode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos; + f2fs_hash_t name_hash; + size_t namelen = name->len; + struct f2fs_inline_dentry *dentry_blk = NULL; + struct f2fs_dentry_ptr d; + int slots = GET_DENTRY_SLOTS(namelen); + struct page *page = NULL; + int err = 0; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + dentry_blk = inline_data_addr(ipage); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_INLINE_DENTRY); + if (bit_pos >= NR_INLINE_DENTRY) { + err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); + if (!err) + err = -EAGAIN; + goto out; + } + + if (inode) { + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, ipage); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + } + + f2fs_wait_on_page_writeback(ipage, NODE); + + name_hash = f2fs_dentry_hash(name); + make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); + f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); + + set_page_dirty(ipage); + + /* we don't need to mark_inode_dirty now */ + if (inode) { + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + } + + update_parent_metadata(dir, inode, 0); +fail: + if (inode) + up_write(&F2FS_I(inode)->i_sem); + + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { + update_inode(dir, ipage); + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } +out: + f2fs_put_page(ipage, 1); + return err; +} + +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *dir, struct inode *inode) +{ + struct f2fs_inline_dentry *inline_dentry; + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + unsigned int bit_pos; + int i; + + lock_page(page); + f2fs_wait_on_page_writeback(page, NODE); + + inline_dentry = inline_data_addr(page); + bit_pos = dentry - inline_dentry->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, + &inline_dentry->dentry_bitmap); + + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode) + f2fs_drop_nlink(dir, inode, page); + + f2fs_put_page(page, 1); +} + +bool f2fs_empty_inline_dir(struct inode *dir) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos = 2; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return false; + + dentry_blk = inline_data_addr(ipage); + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + bit_pos); + + f2fs_put_page(ipage, 1); + + if (bit_pos < NR_INLINE_DENTRY) + return false; + + return true; +} + +int f2fs_read_inline_dir(struct file *file, void *dirent, filldir_t filldir, + struct f2fs_str *fstr) +{ + unsigned long pos = file->f_pos; + unsigned int bit_pos = 0; + struct inode *inode = file_inode(file); + struct f2fs_inline_dentry *inline_dentry = NULL; + struct page *ipage = NULL; + struct f2fs_dentry_ptr d; + + if (pos >= NR_INLINE_DENTRY) + return 0; + + bit_pos = (pos % NR_INLINE_DENTRY); + + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + inline_dentry = inline_data_addr(ipage); + + make_dentry_ptr(inode, &d, (void *)inline_dentry, 2); + + if (!f2fs_fill_dentries(file, dirent, filldir, &d, 0, bit_pos, fstr)) + file->f_pos = NR_INLINE_DENTRY; + + f2fs_put_page(ipage, 1); + return 0; +} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0deead45..f8e6cf6d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -12,7 +12,6 @@ #include #include #include -#include #include "f2fs.h" #include "node.h" @@ -22,20 +21,20 @@ void f2fs_set_inode_flags(struct inode *inode) { unsigned int flags = F2FS_I(inode)->i_flags; - unsigned int new_fl = 0; + + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | + S_NOATIME | S_DIRSYNC); if (flags & FS_SYNC_FL) - new_fl |= S_SYNC; + inode->i_flags |= S_SYNC; if (flags & FS_APPEND_FL) - new_fl |= S_APPEND; + inode->i_flags |= S_APPEND; if (flags & FS_IMMUTABLE_FL) - new_fl |= S_IMMUTABLE; + inode->i_flags |= S_IMMUTABLE; if (flags & FS_NOATIME_FL) - new_fl |= S_NOATIME; + inode->i_flags |= S_NOATIME; if (flags & FS_DIRSYNC_FL) - new_fl |= S_DIRSYNC; - set_mask_bits(&inode->i_flags, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); + inode->i_flags |= S_DIRSYNC; } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -51,6 +50,15 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } +static bool __written_first_block(struct f2fs_inode *ri) +{ + block_t addr = le32_to_cpu(ri->i_addr[0]); + + if (addr != NEW_ADDR && addr != NULL_ADDR) + return true; + return false; +} + static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { @@ -67,6 +75,25 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } +static void __recover_inline_status(struct inode *inode, struct page *ipage) +{ + void *inline_data = inline_data_addr(ipage); + __le32 *start = inline_data; + __le32 *end = start + MAX_INLINE_DATA / sizeof(__le32); + + while (start < end) { + if (*start++) { + f2fs_wait_on_page_writeback(ipage, NODE); + + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage)); + set_page_dirty(ipage); + return; + } + } + return; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -111,13 +138,25 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - get_extent_info(&fi->ext, ri->i_ext); + f2fs_init_extent_cache(inode, &ri->i_ext); + get_inline_info(fi, ri); + /* check data exist */ + if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) + __recover_inline_status(inode, node_page); + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); + if (__written_first_block(ri)) + set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + f2fs_put_page(node_page, 1); + + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + return 0; } @@ -156,9 +195,12 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &f2fs_symlink_inode_operations; + if (f2fs_encrypted_inode(inode)) + inode->i_op = &f2fs_encrypted_symlink_inode_operations; + else + inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { @@ -193,7 +235,11 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_links = cpu_to_le32(inode->i_nlink); ri->i_size = cpu_to_le64(i_size_read(inode)); ri->i_blocks = cpu_to_le64(inode->i_blocks); + + read_lock(&F2FS_I(inode)->ext_lock); set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); + read_unlock(&F2FS_I(inode)->ext_lock); + set_raw_inline(F2FS_I(inode), ri); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); @@ -270,11 +316,11 @@ void f2fs_evict_inode(struct inode *inode) nid_t xnid = F2FS_I(inode)->i_xattr_nid; /* some remained atomic pages should discarded */ - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + if (f2fs_is_atomic_file(inode)) commit_inmem_pages(inode, true); trace_f2fs_evict_inode(inode); - truncate_inode_pages_final(&inode->i_data); + truncate_inode_pages(&inode->i_data, 0); if (inode->i_ino == F2FS_NODE_INO(sbi) || inode->i_ino == F2FS_META_INO(sbi)) @@ -295,11 +341,18 @@ void f2fs_evict_inode(struct inode *inode) f2fs_lock_op(sbi); remove_inode_page(inode); - stat_dec_inline_inode(inode); f2fs_unlock_op(sbi); sb_end_intwrite(inode->i_sb); no_delete: + stat_dec_inline_dir(inode); + stat_dec_inline_inode(inode); + + /* update extent info in inode */ + if (inode->i_nlink) + f2fs_preserve_extent_tree(inode); + f2fs_destroy_extent_tree(inode); + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); @@ -308,6 +361,10 @@ void f2fs_evict_inode(struct inode *inode) if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); out_clear: +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_I(inode)->i_crypt_info) + f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info); +#endif clear_inode(inode); } @@ -325,8 +382,9 @@ void handle_failed_inode(struct inode *inode) f2fs_truncate(inode); remove_inode_page(inode); - stat_dec_inline_inode(inode); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0d2526e5..c4fc9b32 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -9,11 +9,13 @@ * published by the Free Software Foundation. */ #include +#include #include #include #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -21,6 +23,80 @@ #include "acl.h" #include +#ifdef CONFIG_F2FS_EMULATED_SD +/* dcache dops */ +static unsigned int __f2fs_striptail_len(unsigned int len, const char *name) +{ + while (len && name[len - 1] == '.') + len--; + return len; +} + +static unsigned int f2fs_striptail_len(const struct qstr *qstr) +{ + return __f2fs_striptail_len(qstr->len, qstr->name); +} + +static int f2fs_d_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) +{ + const unsigned char *name; + unsigned int len; + unsigned long hash; + + name = qstr->name; + len = f2fs_striptail_len(qstr); + + hash = init_name_hash(); + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + qstr->hash = end_name_hash(hash); + + return 0; +} + +static int f2fs_d_compare(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) +{ + unsigned int alen, blen; + + /* A filename cannot end in '.' or we treat it like it has none */ + alen = f2fs_striptail_len(name); + blen = __f2fs_striptail_len(len, str); + if (alen == blen) { + if (strncasecmp(name->name, str, alen) == 0) + return 0; + } + return 1; +} + +const struct dentry_operations f2fs_dops = { + .d_hash = f2fs_d_hash, + .d_compare = f2fs_d_compare, +}; + +void f2fs_set_nocase_dop(struct inode *inode) +{ + struct dentry *dentry; + + /* only dir can be set */ + if (!S_ISDIR(inode->i_mode)) + return; + + /* dir inode have one alias at most */ + dentry = d_find_alias(inode); + + if (dentry) { + if (!dentry->d_op) { + shrink_dcache_parent(dentry); + d_set_d_op(dentry, &f2fs_dops); + } + dput(dentry); + } +} +#endif + static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -54,6 +130,19 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; goto out; } + + /* If the directory encrypted, then we should encrypt the inode. */ + if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) + f2fs_set_encrypted_inode(inode); + + if (f2fs_may_inline_data(inode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + if (f2fs_may_inline_dentry(inode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + trace_f2fs_new_inode(inode, 0); mark_inode_dirty(inode); return inode; @@ -131,6 +220,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -144,6 +236,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; + if (f2fs_encrypted_inode(dir) && + !f2fs_is_child_context_consistent_with_parent(dir, inode)) + return -EPERM; + f2fs_balance_fs(sbi); inode->i_ctime = CURRENT_TIME; @@ -157,6 +253,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_unlock_op(sbi); d_instantiate(dentry, inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); @@ -174,30 +273,96 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); } +static int __recover_dot_dentries(struct inode *dir, nid_t pino) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct qstr dot = {.len = 1, .name = "."}; + struct qstr dotdot = {.len = 2, .name = ".."}; + struct f2fs_dir_entry *de; + struct page *page; + int err = 0; + + f2fs_lock_op(sbi); + + de = f2fs_find_entry(dir, &dot, &page, 0); + if (de) { + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + } else { + err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); + if (err) + goto out; + } + + de = f2fs_find_entry(dir, &dotdot, &page, 0); + if (de) { + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + } else { + err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); + } +out: + if (!err) { + clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS); + mark_inode_dirty(dir); + } + + f2fs_unlock_op(sbi); + return err; +} + static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode = NULL; struct f2fs_dir_entry *de; struct page *page; + nid_t ino; + int err = 0; if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - de = f2fs_find_entry(dir, &dentry->d_name, &page); - if (de) { - nid_t ino = le32_to_cpu(de->ino); - kunmap(page); - f2fs_put_page(page, 0); +#ifdef CONFIG_F2FS_EMULATED_SD + if (!dentry->d_op && dentry->d_parent && dentry->d_parent->d_op) + d_set_d_op(dentry, dentry->d_parent->d_op); - inode = f2fs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (dentry->d_op) + flags |= LOOKUP_NOCASE; +#endif + + de = f2fs_find_entry(dir, &dentry->d_name, &page, flags); + if (!de) + return d_splice_alias(inode, dentry); - stat_inc_inline_inode(inode); + ino = le32_to_cpu(de->ino); + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + + inode = f2fs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + if (f2fs_has_inline_dots(inode)) { + err = __recover_dot_dentries(inode, dir->i_ino); + if (err) + goto err_out; } +#ifdef CONFIG_F2FS_EMULATED_SD + if (S_ISDIR(inode->i_mode) && !dentry->d_op) { + err = f2fs_getxattr(inode, F2FS_XATTR_INDEX_USER, + F2FS_XATTR_DIR_NOCASE, NULL, 0, NULL); + if (err > 0) + d_set_d_op(dentry, &f2fs_dops); + } +#endif + return d_splice_alias(inode, dentry); + +err_out: + iget_failed(inode); + return ERR_PTR(err); } static int f2fs_unlink(struct inode *dir, struct dentry *dentry) @@ -211,7 +376,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); f2fs_balance_fs(sbi); - de = f2fs_find_entry(dir, &dentry->d_name, &page); + de = f2fs_find_entry(dir, &dentry->d_name, &page, 0); if (!de) goto fail; @@ -219,35 +384,63 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); goto fail; } - f2fs_delete_entry(de, page, inode); + f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); fail: trace_f2fs_unlink_exit(inode, err); return err; } +static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct page *page = page_follow_link_light(dentry, nd); + + if (IS_ERR_OR_NULL(page)) + return page; + + /* this is broken symlink case */ + if (*nd_get_link(nd) == 0) { + page_put_link(dentry, nd, page); + return ERR_PTR(-ENOENT); + } + return page; +} + static int f2fs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; - size_t symlen = strlen(symname) + 1; + size_t len = strlen(symname); + size_t p_len; + char *p_str; + struct f2fs_str disk_link = FSTR_INIT(NULL, 0); + struct f2fs_encrypted_symlink_data *sd = NULL; int err; + if (len > dir->i_sb->s_blocksize) + return -ENAMETOOLONG; + f2fs_balance_fs(sbi); inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); - inode->i_op = &f2fs_symlink_inode_operations; + if (f2fs_encrypted_inode(inode)) + inode->i_op = &f2fs_encrypted_symlink_inode_operations; + else + inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; f2fs_lock_op(sbi); @@ -255,12 +448,66 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (err) goto out; f2fs_unlock_op(sbi); - - err = page_symlink(inode, symname, symlen); alloc_nid_done(sbi, inode->i_ino); + if (f2fs_encrypted_inode(dir)) { + struct qstr istr = QSTR_INIT(symname, len); + + err = f2fs_get_encryption_info(inode); + if (err) + goto err_out; + + err = f2fs_fname_crypto_alloc_buffer(inode, len, &disk_link); + if (err) + goto err_out; + + err = f2fs_fname_usr_to_disk(inode, &istr, &disk_link); + if (err < 0) + goto err_out; + + p_len = encrypted_symlink_data_len(disk_link.len) + 1; + + if (p_len > dir->i_sb->s_blocksize) { + err = -ENAMETOOLONG; + goto err_out; + } + + sd = kzalloc(p_len, GFP_NOFS); + if (!sd) { + err = -ENOMEM; + goto err_out; + } + memcpy(sd->encrypted_path, disk_link.name, disk_link.len); + sd->len = cpu_to_le16(disk_link.len); + p_str = (char *)sd; + } else { + p_len = len + 1; + p_str = (char *)symname; + } + + err = page_symlink(inode, p_str, p_len); + +err_out: d_instantiate(dentry, inode); unlock_new_inode(inode); + + /* + * Let's flush symlink data in order to avoid broken symlink as much as + * possible. Nevertheless, fsyncing is the best way, but there is no + * way to get a file descriptor in order to flush that. + * + * Note that, it needs to do dir->fsync to make this recoverable. + * If the symlink path is stored into inline_data, there is no + * performance regression. + */ + if (!err) + filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); + + kfree(sd); + f2fs_fname_crypto_free_buffer(&disk_link); return err; out: handle_failed_inode(inode); @@ -282,7 +529,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); set_inode_flag(F2FS_I(inode), FI_INC_LINK); f2fs_lock_op(sbi); @@ -296,6 +543,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) d_instantiate(dentry, inode); unlock_new_inode(inode); + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_fail: @@ -338,8 +587,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -359,9 +612,16 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *new_entry; int err = -ENOENT; + if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) && + !f2fs_is_child_context_consistent_with_parent(new_dir, + old_inode)) { + err = -EPERM; + goto out; + } + f2fs_balance_fs(sbi); - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page, 0); if (!old_entry) goto out; @@ -380,7 +640,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, - &new_page); + &new_page, 0); if (!new_entry) goto out_dir; @@ -390,7 +650,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto put_out_dir; - if (update_dent_inode(old_inode, &new_dentry->d_name)) { + if (update_dent_inode(old_inode, new_inode, + &new_dentry->d_name)) { release_orphan_inode(sbi); goto put_out_dir; } @@ -430,12 +691,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, down_write(&F2FS_I(old_inode)->i_sem); file_lost_pino(old_inode); + if (new_inode && file_enc_name(new_inode)) + file_set_enc_name(old_inode); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = CURRENT_TIME; mark_inode_dirty(old_inode); - f2fs_delete_entry(old_entry, old_page, NULL); + f2fs_delete_entry(old_entry, old_page, old_dir, NULL); if (old_dir_entry) { if (old_dir != new_dir) { @@ -443,7 +706,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_page, new_dir); update_inode_page(old_inode); } else { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); @@ -452,227 +715,112 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; put_out_dir: f2fs_unlock_op(sbi); - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_old: - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; } -static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +#ifdef CONFIG_F2FS_FS_ENCRYPTION +static void *f2fs_encrypted_follow_link(struct dentry *dentry, + struct nameidata *nd) { - struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); - struct inode *old_inode = old_dentry->d_inode; - struct inode *new_inode = new_dentry->d_inode; - struct page *old_dir_page, *new_dir_page; - struct page *old_page, *new_page; - struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; - struct f2fs_dir_entry *old_entry, *new_entry; - int old_nlink = 0, new_nlink = 0; - int err = -ENOENT; - - f2fs_balance_fs(sbi); - - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); - if (!old_entry) - goto out; - - new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); - if (!new_entry) - goto out_old; - - /* prepare for updating ".." directory entry info later */ - if (old_dir != new_dir) { - if (S_ISDIR(old_inode->i_mode)) { - err = -EIO; - old_dir_entry = f2fs_parent_dir(old_inode, - &old_dir_page); - if (!old_dir_entry) - goto out_new; - } - - if (S_ISDIR(new_inode->i_mode)) { - err = -EIO; - new_dir_entry = f2fs_parent_dir(new_inode, - &new_dir_page); - if (!new_dir_entry) - goto out_old_dir; - } - } - - /* - * If cross rename between file and directory those are not - * in the same directory, we will inc nlink of file's parent - * later, so we should check upper boundary of its nlink. - */ - if ((!old_dir_entry || !new_dir_entry) && - old_dir_entry != new_dir_entry) { - old_nlink = old_dir_entry ? -1 : 1; - new_nlink = -old_nlink; - err = -EMLINK; - if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) || - (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX)) - goto out_new_dir; - } - - f2fs_lock_op(sbi); - - err = update_dent_inode(old_inode, &new_dentry->d_name); - if (err) - goto out_unlock; - - err = update_dent_inode(new_inode, &old_dentry->d_name); - if (err) - goto out_undo; - - /* update ".." directory entry info of old dentry */ - if (old_dir_entry) - f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); - - /* update ".." directory entry info of new dentry */ - if (new_dir_entry) - f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir); - - /* update directory entry info of old dir inode */ - f2fs_set_link(old_dir, old_entry, old_page, new_inode); - - down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); - up_write(&F2FS_I(old_inode)->i_sem); - - update_inode_page(old_inode); - - old_dir->i_ctime = CURRENT_TIME; - if (old_nlink) { - down_write(&F2FS_I(old_dir)->i_sem); - if (old_nlink < 0) - drop_nlink(old_dir); - else - inc_nlink(old_dir); - up_write(&F2FS_I(old_dir)->i_sem); - } - mark_inode_dirty(old_dir); - update_inode_page(old_dir); - - /* update directory entry info of new dir inode */ - f2fs_set_link(new_dir, new_entry, new_page, old_inode); - - down_write(&F2FS_I(new_inode)->i_sem); - file_lost_pino(new_inode); - up_write(&F2FS_I(new_inode)->i_sem); - - update_inode_page(new_inode); - - new_dir->i_ctime = CURRENT_TIME; - if (new_nlink) { - down_write(&F2FS_I(new_dir)->i_sem); - if (new_nlink < 0) - drop_nlink(new_dir); - else - inc_nlink(new_dir); - up_write(&F2FS_I(new_dir)->i_sem); + struct page *cpage = NULL; + char *caddr, *paddr = NULL; + struct f2fs_str cstr; + struct f2fs_str pstr = FSTR_INIT(NULL, 0); + struct inode *inode = dentry->d_inode; + struct f2fs_encrypted_symlink_data *sd; + loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1); + u32 max_size = inode->i_sb->s_blocksize; + int res; + + res = f2fs_get_encryption_info(inode); + if (res) + return ERR_PTR(res); + + cpage = read_mapping_page(inode->i_mapping, 0, NULL); + if (IS_ERR(cpage)) + return cpage; + caddr = kmap(cpage); + caddr[size] = 0; + + /* Symlink is encrypted */ + sd = (struct f2fs_encrypted_symlink_data *)caddr; + cstr.name = sd->encrypted_path; + cstr.len = le16_to_cpu(sd->len); + + /* this is broken symlink case */ + if (cstr.name[0] == 0 && cstr.len == 0) { + res = -ENOENT; + goto errout; } - mark_inode_dirty(new_dir); - update_inode_page(new_dir); - f2fs_unlock_op(sbi); - return 0; -out_undo: - /* Still we may fail to recover name info of f2fs_inode here */ - update_dent_inode(old_inode, &old_dentry->d_name); -out_unlock: - f2fs_unlock_op(sbi); -out_new_dir: - if (new_dir_entry) { - kunmap(new_dir_page); - f2fs_put_page(new_dir_page, 0); + if ((cstr.len + sizeof(struct f2fs_encrypted_symlink_data) - 1) > + max_size) { + /* Symlink data on the disk is corrupted */ + res = -EIO; + goto errout; } -out_old_dir: - if (old_dir_entry) { - kunmap(old_dir_page); - f2fs_put_page(old_dir_page, 0); - } -out_new: - kunmap(new_page); - f2fs_put_page(new_page, 0); -out_old: - kunmap(old_page); - f2fs_put_page(old_page, 0); -out: - return err; + res = f2fs_fname_crypto_alloc_buffer(inode, cstr.len, &pstr); + if (res) + goto errout; + + res = f2fs_fname_disk_to_usr(inode, NULL, &cstr, &pstr); + if (res < 0) + goto errout; + + paddr = pstr.name; + + /* Null-terminate the name */ + paddr[res] = '\0'; + nd_set_link(nd, paddr); + + kunmap(cpage); + page_cache_release(cpage); + return NULL; +errout: + f2fs_fname_crypto_free_buffer(&pstr); + kunmap(cpage); + page_cache_release(cpage); + return ERR_PTR(res); } -static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) +void kfree_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) { - if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) - return -EINVAL; - - if (flags & RENAME_EXCHANGE) { - return f2fs_cross_rename(old_dir, old_dentry, - new_dir, new_dentry); - } - /* - * VFS has already handled the new dentry existence case, - * here, we just deal with "RENAME_NOREPLACE" as regular rename. - */ - return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry); + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); } -static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct inode *inode; - int err; - - inode = f2fs_new_inode(dir, mode); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - inode->i_op = &f2fs_file_inode_operations; - inode->i_fop = &f2fs_file_operations; - inode->i_mapping->a_ops = &f2fs_dblock_aops; - - f2fs_lock_op(sbi); - err = acquire_orphan_inode(sbi); - if (err) - goto out; - - err = f2fs_do_tmpfile(inode, dir); - if (err) - goto release_out; - - /* - * add this non-linked tmpfile to orphan list, in this way we could - * remove all unused data of tmpfile after abnormal power-off. - */ - add_orphan_inode(sbi, inode->i_ino); - f2fs_unlock_op(sbi); - - alloc_nid_done(sbi, inode->i_ino); - d_tmpfile(dentry, inode); - unlock_new_inode(inode); - return 0; - -release_out: - release_orphan_inode(sbi); -out: - handle_failed_inode(inode); - return err; -} +const struct inode_operations f2fs_encrypted_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = f2fs_encrypted_follow_link, + .put_link = kfree_put_link, + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +}; +#endif const struct inode_operations f2fs_dir_inode_operations = { .create = f2fs_create, @@ -683,12 +831,10 @@ const struct inode_operations f2fs_dir_inode_operations = { .mkdir = f2fs_mkdir, .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, - .rename2 = f2fs_rename2, - .tmpfile = f2fs_tmpfile, + .rename = f2fs_rename, .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, - .set_acl = f2fs_set_acl, #ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -699,7 +845,7 @@ const struct inode_operations f2fs_dir_inode_operations = { const struct inode_operations f2fs_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = page_follow_link_light, + .follow_link = f2fs_follow_link, .put_link = page_put_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, @@ -715,7 +861,6 @@ const struct inode_operations f2fs_special_inode_operations = { .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, - .set_acl = f2fs_set_acl, #ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 44b8afef..8abeb450 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -19,6 +19,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) @@ -31,22 +32,46 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; + unsigned long avail_ram; unsigned long mem_size = 0; bool res = false; si_meminfo(&val); - /* give 25%, 25%, 50% memory for each components respectively */ + + /* only uses low memory */ + avail_ram = val.totalram - val.totalhigh; + + /* + * give 25%, 25%, 50%, 50%, 50% memory for each components respectively + */ if (type == FREE_NIDS) { - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->dirty_exceeded) return false; mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INO_ENTRIES) { + int i; + + for (i = 0; i <= UPDATE_INO; i++) + mem_size += (sbi->im[i].ino_num * + sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == EXTENT_CACHE) { + mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) + + atomic_read(&sbi->total_ext_node) * + sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else { + if (sbi->sb->s_bdi->dirty_exceeded) + return false; } return res; } @@ -131,7 +156,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) return; -retry: + head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); @@ -140,11 +165,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, INIT_LIST_HEAD(&head->set_list); head->set = set; head->entry_cnt = 0; - - if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { - cond_resched(); - goto retry; - } + f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } list_move_tail(&ne->list, &head->entry_list); nm_i->dirty_nat_cnt++; @@ -155,7 +176,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, struct nat_entry *ne) { - nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; + nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; head = radix_tree_lookup(&nm_i->nat_set_root, set); @@ -174,32 +195,35 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } -bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - bool is_cp = true; + bool need = false; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); - if (e && !get_nat_flag(e, IS_CHECKPOINTED)) - is_cp = false; - read_unlock(&nm_i->nat_tree_lock); - return is_cp; + if (e) { + if (!get_nat_flag(e, IS_CHECKPOINTED) && + !get_nat_flag(e, HAS_FSYNCED_INODE)) + need = true; + } + up_read(&nm_i->nat_tree_lock); + return need; } -bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - bool fsynced = false; + bool is_cp = true; - read_lock(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ino); - if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) - fsynced = true; - read_unlock(&nm_i->nat_tree_lock); - return fsynced; + down_read(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e && !get_nat_flag(e, IS_CHECKPOINTED)) + is_cp = false; + up_read(&nm_i->nat_tree_lock); + return is_cp; } bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) @@ -208,13 +232,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need_update; } @@ -222,13 +246,8 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; - new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); memset(new, 0, sizeof(struct nat_entry)); nat_set_nid(new, nid); nat_reset_flag(new); @@ -241,18 +260,14 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, struct f2fs_nat_entry *ne) { struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } node_info_from_raw_nat(&e->ni, ne); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -260,16 +275,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } - e->ni = *ni; + copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { /* @@ -277,7 +288,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, * previous nat entry can be remained in nat cache. * So, reinitialize it with new information. */ - e->ni = *ni; + copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); } @@ -304,13 +315,14 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, __set_nat_cache_dirty(nm_i, e); /* update fsync_mark if its inode nat entry is still alive */ - e = __lookup_nat_cache(nm_i, ni->ino); + if (ni->nid != ni->ino) + e = __lookup_nat_cache(nm_i, ni->ino); if (e) { if (fsync_done && ni->nid == ni->ino) set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -320,7 +332,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) if (available_free_memory(sbi, NAT_ENTRIES)) return 0; - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; ne = list_first_entry(&nm_i->nat_entries, @@ -328,7 +340,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); return nr_shrink; } @@ -347,21 +359,22 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) struct nat_entry *e; int i; - memset(&ne, 0, sizeof(struct f2fs_nat_entry)); ni->nid = nid; /* Check nat cache */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); } - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (e) return; + memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + /* Check current segment summary */ mutex_lock(&curseg->curseg_mutex); i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); @@ -472,7 +485,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct page *npage[4]; - struct page *parent; + struct page *parent = NULL; int offset[4]; unsigned int noffset[4]; nid_t nids[4]; @@ -489,6 +502,14 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (IS_ERR(npage[0])) return PTR_ERR(npage[0]); } + + /* if inline_data is set, should not report any block indices */ + if (f2fs_has_inline_data(dn->inode) && index) { + err = -ENOENT; + f2fs_put_page(npage[0], 1); + goto release_out; + } + parent = npage[0]; if (level != 0) nids[1] = get_nid(parent, offset[0], true); @@ -586,7 +607,7 @@ static void truncate_node(struct dnode_of_data *dn) } invalidate: clear_node_page_dirty(dn->node_page); - F2FS_SET_SB_DIRT(sbi); + set_sbi_flag(sbi, SBI_IS_DIRTY); f2fs_put_page(dn->node_page, 1); @@ -977,10 +998,18 @@ static int read_node_page(struct page *page, int rw) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; + struct f2fs_io_info fio = { + .sbi = sbi, + .type = NODE, + .rw = rw, + .page = page, + .encrypted_page = NULL, + }; get_node_info(sbi, page->index, &ni); if (unlikely(ni.blk_addr == NULL_ADDR)) { + ClearPageUptodate(page); f2fs_put_page(page, 1); return -ENOENT; } @@ -988,7 +1017,8 @@ static int read_node_page(struct page *page, int rw) if (PageUptodate(page)) return LOCKED_PAGE; - return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw); + fio.blk_addr = ni.blk_addr; + return f2fs_submit_page_bio(&fio); } /* @@ -1029,11 +1059,11 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) err = read_node_page(page, READ_SYNC); if (err < 0) return ERR_PTR(err); - else if (err == LOCKED_PAGE) - goto got_it; + else if (err != LOCKED_PAGE) + lock_page(page); - lock_page(page); if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { + ClearPageUptodate(page); f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -1041,7 +1071,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) f2fs_put_page(page, 1); goto repeat; } -got_it: + mark_page_accessed(page); return page; } @@ -1096,6 +1126,7 @@ struct page *get_node_page_ra(struct page *parent, int start) f2fs_put_page(page, 1); return ERR_PTR(-EIO); } + mark_page_accessed(page); return page; } @@ -1182,13 +1213,9 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, /* called by fsync() */ if (ino && IS_DNODE(page)) { set_fsync_mark(page, 1); - if (IS_INODE(page)) { - if (!is_checkpointed_node(sbi, ino) && - !has_fsynced_inode(sbi, ino)) - set_dentry_mark(page, 1); - else - set_dentry_mark(page, 0); - } + if (IS_INODE(page)) + set_dentry_mark(page, + need_dentry_mark(sbi, ino)); nwritten++; } else { set_fsync_mark(page, 0); @@ -1269,16 +1296,18 @@ static int f2fs_write_node_page(struct page *page, { struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; - block_t new_addr; struct node_info ni; struct f2fs_io_info fio = { + .sbi = sbi, .type = NODE, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, + .page = page, + .encrypted_page = NULL, }; trace_f2fs_writepage(page, NODE); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; @@ -1293,21 +1322,30 @@ static int f2fs_write_node_page(struct page *page, /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { + ClearPageUptodate(page); dec_page_count(sbi, F2FS_DIRTY_NODES); unlock_page(page); return 0; } - if (wbc->for_reclaim) - goto redirty_out; + if (wbc->for_reclaim) { + if (!down_read_trylock(&sbi->node_write)) + goto redirty_out; + } else { + down_read(&sbi->node_write); + } - down_read(&sbi->node_write); set_page_writeback(page); - write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); - set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); + fio.blk_addr = ni.blk_addr; + write_node_page(nid, &fio); + set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, NODE, WRITE); + return 0; redirty_out: @@ -1350,26 +1388,12 @@ static int f2fs_set_node_page_dirty(struct page *page) __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); + f2fs_trace_pid(page); return 1; } return 0; } -static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, - unsigned int length) -{ - struct inode *inode = page->mapping->host; - if (PageDirty(page)) - dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES); - ClearPagePrivate(page); -} - -static int f2fs_release_node_page(struct page *page, gfp_t wait) -{ - ClearPagePrivate(page); - return 1; -} - /* * Structure of the f2fs node operations */ @@ -1377,8 +1401,8 @@ const struct address_space_operations f2fs_node_aops = { .writepage = f2fs_write_node_page, .writepages = f2fs_write_node_pages, .set_page_dirty = f2fs_set_node_page_dirty, - .invalidatepage = f2fs_invalidate_node_page, - .releasepage = f2fs_release_node_page, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, }; static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, @@ -1410,13 +1434,13 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) if (build) { /* do not add allocated nids */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (allocated) return 0; } @@ -1425,15 +1449,22 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i->nid = nid; i->state = NID_NEW; + if (radix_tree_preload(GFP_NOFS)) { + kmem_cache_free(free_nid_slab, i); + return 0; + } + spin_lock(&nm_i->free_nid_list_lock); if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); kmem_cache_free(free_nid_slab, i); return 0; } list_add_tail(&i->list, &nm_i->free_nid_list); nm_i->fcnt++; spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); return 1; } @@ -1714,80 +1745,41 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) return 0; } -/* - * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-read pages are allocated in bd_inode's mapping tree. - */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, - int start, int nrpages) -{ - struct inode *inode = sbi->sb->s_bdev->bd_inode; - struct address_space *mapping = inode->i_mapping; - int i, page_idx = start; - struct f2fs_io_info fio = { - .type = META, - .rw = READ_SYNC | REQ_META | REQ_PRIO - }; - - for (i = 0; page_idx < start + nrpages; page_idx++, i++) { - /* alloc page in bd_inode for reading node summary info */ - pages[i] = grab_cache_page(mapping, page_idx); - if (!pages[i]) - break; - f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio); - } - - f2fs_submit_merged_bio(sbi, META, READ); - return i; -} - int restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum) { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct inode *inode = sbi->sb->s_bdev->bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(sbi); - struct page *pages[bio_blocks]; - int i, idx, last_offset, nrpages, err = 0; + int i, idx, last_offset, nrpages; /* scan the node segment */ last_offset = sbi->blocks_per_seg; addr = START_BLOCK(sbi, segno); sum_entry = &sum->entries[0]; - for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { + for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { nrpages = min(last_offset - i, bio_blocks); /* readahead node pages */ - nrpages = ra_sum_pages(sbi, pages, addr, nrpages); - if (!nrpages) - return -ENOMEM; + ra_meta_pages(sbi, addr, nrpages, META_POR); - for (idx = 0; idx < nrpages; idx++) { - if (err) - goto skip; + for (idx = addr; idx < addr + nrpages; idx++) { + struct page *page = get_meta_page(sbi, idx); - lock_page(pages[idx]); - if (unlikely(!PageUptodate(pages[idx]))) { - err = -EIO; - } else { - rn = F2FS_NODE(pages[idx]); - sum_entry->nid = rn->footer.nid; - sum_entry->version = 0; - sum_entry->ofs_in_node = 0; - sum_entry++; - } - unlock_page(pages[idx]); -skip: - page_cache_release(pages[idx]); + rn = F2FS_NODE(page); + sum_entry->nid = rn->footer.nid; + sum_entry->version = 0; + sum_entry->ofs_in_node = 0; + sum_entry++; + f2fs_put_page(page, 1); } - invalidate_mapping_pages(inode->i_mapping, addr, + invalidate_mapping_pages(META_MAPPING(sbi), addr, addr + nrpages); } - return err; + return 0; } static void remove_nats_in_journal(struct f2fs_sb_info *sbi) @@ -1804,21 +1796,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); raw_ne = nat_in_journal(sum, i); -retry: - write_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, nid); - if (ne) - goto found; - ne = grab_nat_entry(nm_i, nid); + down_write(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; + ne = grab_nat_entry(nm_i, nid); + node_info_from_raw_nat(&ne->ni, &raw_ne); } - node_info_from_raw_nat(&ne->ni, &raw_ne); -found: __set_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); @@ -1852,6 +1838,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; struct page *page = NULL; + struct f2fs_nm_info *nm_i = NM_I(sbi); /* * there are two steps to flush nat entries: @@ -1889,10 +1876,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, } raw_nat_from_node_info(raw_ne, &ne->ni); - write_lock(&NM_I(sbi)->nat_tree_lock); + down_write(&NM_I(sbi)->nat_tree_lock); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - write_unlock(&NM_I(sbi)->nat_tree_lock); + up_write(&NM_I(sbi)->nat_tree_lock); if (nat_get_blkaddr(ne) == NULL_ADDR) add_free_nid(sbi, nid, false); @@ -1903,10 +1890,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, else f2fs_put_page(page, 1); - if (!set->entry_cnt) { - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - kmem_cache_free(nat_entry_set_slab, set); - } + f2fs_bug_on(sbi, set->entry_cnt); + + down_write(&nm_i->nat_tree_lock); + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + up_write(&nm_i->nat_tree_lock); + kmem_cache_free(nat_entry_set_slab, set); } /* @@ -1917,12 +1906,14 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - struct nat_entry_set *setvec[NATVEC_SIZE]; + struct nat_entry_set *setvec[SETVEC_SIZE]; struct nat_entry_set *set, *tmp; unsigned int found; nid_t set_idx = 0; LIST_HEAD(sets); + if (!nm_i->dirty_nat_cnt) + return; /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them @@ -1931,17 +1922,16 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); - if (!nm_i->dirty_nat_cnt) - return; - + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_set(nm_i, - set_idx, NATVEC_SIZE, setvec))) { + set_idx, SETVEC_SIZE, setvec))) { unsigned idx; set_idx = setvec[found - 1]->set + 1; for (idx = 0; idx < found; idx++) __adjust_nat_entry_set(setvec[idx], &sets, MAX_NAT_JENTRIES(sum)); } + up_write(&nm_i->nat_tree_lock); /* flush dirty nats in nat entry set */ list_for_each_entry_safe(set, tmp, &sets, set_list) @@ -1973,13 +1963,13 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); - INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); - INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); + INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - rwlock_init(&nm_i->nat_tree_lock); + init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2015,6 +2005,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *next_i; struct nat_entry *natvec[NATVEC_SIZE]; + struct nat_entry_set *setvec[SETVEC_SIZE]; nid_t nid = 0; unsigned int found; @@ -2035,16 +2026,32 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; + nid = nat_get_nid(natvec[found - 1]) + 1; for (idx = 0; idx < found; idx++) __del_from_nat_cache(nm_i, natvec[idx]); } f2fs_bug_on(sbi, nm_i->nat_cnt); - write_unlock(&nm_i->nat_tree_lock); + + /* destroy nat set cache */ + nid = 0; + while ((found = __gang_lookup_nat_set(nm_i, + nid, SETVEC_SIZE, setvec))) { + unsigned idx; + + nid = setvec[found - 1]->set + 1; + for (idx = 0; idx < found; idx++) { + /* entry_cnt is not zero, when cp_error was occurred */ + f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list)); + radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set); + kmem_cache_free(nat_entry_set_slab, setvec[idx]); + } + } + up_write(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; @@ -2061,17 +2068,17 @@ int __init create_node_manager_caches(void) free_nid_slab = f2fs_kmem_cache_create("free_nid", sizeof(struct free_nid)); if (!free_nid_slab) - goto destory_nat_entry; + goto destroy_nat_entry; nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) - goto destory_free_nid; + goto destroy_free_nid; return 0; -destory_free_nid: +destroy_free_nid: kmem_cache_destroy(free_nid_slab); -destory_nat_entry: +destroy_nat_entry: kmem_cache_destroy(nat_entry_slab); fail: return -ENOMEM; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8d5e6e0d..7427e956 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -25,10 +25,19 @@ /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 +#define SETVEC_SIZE 32 /* return value for read_node_page */ #define LOCKED_PAGE 1 +/* For flag in struct node_info */ +enum { + IS_CHECKPOINTED, /* is it checkpointed before? */ + HAS_FSYNCED_INODE, /* is the inode fsynced before? */ + HAS_LAST_FSYNC, /* has the latest node fsync mark? */ + IS_DIRTY, /* this nat entry is dirty? */ +}; + /* * For node information */ @@ -37,18 +46,11 @@ struct node_info { nid_t ino; /* inode number of the node's owner */ block_t blk_addr; /* block address of the node */ unsigned char version; /* version of the node */ -}; - -enum { - IS_CHECKPOINTED, /* is it checkpointed before? */ - HAS_FSYNCED_INODE, /* is the inode fsynced before? */ - HAS_LAST_FSYNC, /* has the latest node fsync mark? */ - IS_DIRTY, /* this nat entry is dirty? */ + unsigned char flag; /* for node information bits */ }; struct nat_entry { struct list_head list; /* for clean or dirty nat list */ - unsigned char flag; /* for node information bits */ struct node_info ni; /* in-memory node information */ }; @@ -63,20 +65,30 @@ struct nat_entry { #define inc_node_version(version) (++version) +static inline void copy_node_info(struct node_info *dst, + struct node_info *src) +{ + dst->nid = src->nid; + dst->ino = src->ino; + dst->blk_addr = src->blk_addr; + dst->version = src->version; + /* should not copy flag here */ +} + static inline void set_nat_flag(struct nat_entry *ne, unsigned int type, bool set) { unsigned char mask = 0x01 << type; if (set) - ne->flag |= mask; + ne->ni.flag |= mask; else - ne->flag &= ~mask; + ne->ni.flag &= ~mask; } static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) { unsigned char mask = 0x01 << type; - return ne->flag & mask; + return ne->ni.flag & mask; } static inline void nat_reset_flag(struct nat_entry *ne) @@ -106,7 +118,10 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ - DIRTY_DENTS /* indicates dirty dentry pages */ + DIRTY_DENTS, /* indicates dirty dentry pages */ + INO_ENTRIES, /* indicates inode entries */ + EXTENT_CACHE, /* indicates extent cache */ + BASE_CHECK, /* check kernel status */ }; struct nat_entry_set { @@ -192,21 +207,26 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) { unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); - if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - f2fs_clear_bit(block_off, nm_i->nat_bitmap); - else - f2fs_set_bit(block_off, nm_i->nat_bitmap); + f2fs_change_bit(block_off, nm_i->nat_bitmap); } static inline void fill_node_footer(struct page *page, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { struct f2fs_node *rn = F2FS_NODE(page); + unsigned int old_flag = 0; + if (reset) memset(rn, 0, sizeof(*rn)); + else + old_flag = le32_to_cpu(rn->footer.flag); + rn->footer.nid = cpu_to_le32(nid); rn->footer.ino = cpu_to_le32(ino); - rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); + + /* should remain old flag bits such as COLD_BIT_SHIFT */ + rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) | + (old_flag & OFFSET_BIT_MASK)); } static inline void copy_node_footer(struct page *dst, struct page *src) @@ -323,28 +343,6 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold node blocks in their node footer * - Mark cold data pages in page cache */ -static inline int is_file(struct inode *inode, int type) -{ - return F2FS_I(inode)->i_advise & type; -} - -static inline void set_file(struct inode *inode, int type) -{ - F2FS_I(inode)->i_advise |= type; -} - -static inline void clear_file(struct inode *inode, int type) -{ - F2FS_I(inode)->i_advise &= ~type; -} - -#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) -#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) -#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) -#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) -#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) -#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) - static inline int is_cold_data(struct page *page) { return PageChecked(page); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ebd01322..bae8b8c2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -83,6 +83,11 @@ static int recover_dentry(struct inode *inode, struct page *ipage) goto out; } + if (file_enc_name(inode)) { + iput(dir); + return 0; + } + name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; @@ -92,11 +97,10 @@ static int recover_dentry(struct inode *inode, struct page *ipage) goto out_err; } retry: - de = f2fs_find_entry(dir, &name, &page); - if (de && inode->i_ino == le32_to_cpu(de->ino)) { - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + de = f2fs_find_entry(dir, &name, &page, 0); + if (de && inode->i_ino == le32_to_cpu(de->ino)) goto out_unmap_put; - } + if (de) { einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { @@ -111,11 +115,11 @@ static int recover_dentry(struct inode *inode, struct page *ipage) iput(einode); goto out_unmap_put; } - f2fs_delete_entry(de, page, einode); + f2fs_delete_entry(de, page, dir, einode); iput(einode); goto retry; } - err = __f2fs_add_link(dir, &name, inode); + err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); if (err) goto out_err; @@ -129,7 +133,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) goto out; out_unmap_put: - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); out_err: iput(dir); @@ -144,6 +148,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage) static void recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); + char *name; inode->i_mode = le16_to_cpu(raw->i_mode); i_size_write(inode, le64_to_cpu(raw->i_size)); @@ -154,8 +159,13 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); + if (file_enc_name(inode)) + name = ""; + else + name = F2FS_INODE(page)->i_name; + f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", - ino_of_node(page), F2FS_INODE(page)->i_name); + ino_of_node(page), name); } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) @@ -170,13 +180,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + ra_meta_pages(sbi, blkaddr, 1, META_POR); + while (1) { struct fsync_inode_entry *entry; - if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) + if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; - page = get_meta_page_ra(sbi, blkaddr); + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) break; @@ -185,11 +197,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) goto next; entry = get_fsync_inode(head, ino_of_node(page)); - if (entry) { - if (IS_INODE(page) && is_dent_dnode(page)) - set_inode_flag(F2FS_I(entry->inode), - FI_INC_LINK); - } else { + if (!entry) { if (IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) @@ -210,8 +218,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); kmem_cache_free(fsync_entry_slab, entry); - if (err == -ENOENT) + if (err == -ENOENT) { + err = 0; goto next; + } break; } list_add_tail(&entry->list, head); @@ -227,6 +237,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); + + ra_meta_pages_cond(sbi, blkaddr); } f2fs_put_page(page, 1); return err; @@ -252,6 +264,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, struct f2fs_summary_block *sum_node; struct f2fs_summary sum; struct page *sum_page, *node_page; + struct dnode_of_data tdn = *dn; nid_t ino, nid; struct inode *inode; unsigned int offset; @@ -279,17 +292,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, /* Use the locked dnode page and inode */ nid = le32_to_cpu(sum.nid); if (dn->inode->i_ino == nid) { - struct dnode_of_data tdn = *dn; tdn.nid = nid; + if (!dn->inode_page_locked) + lock_page(dn->inode_page); tdn.node_page = dn->inode_page; tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); - truncate_data_blocks_range(&tdn, 1); - return 0; + goto truncate_out; } else if (dn->nid == nid) { - struct dnode_of_data tdn = *dn; tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); - truncate_data_blocks_range(&tdn, 1); - return 0; + goto truncate_out; } /* Get the node page */ @@ -313,18 +324,33 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, bidx = start_bidx_of_node(offset, F2FS_I(inode)) + le16_to_cpu(sum.ofs_in_node); - if (ino != dn->inode->i_ino) { - truncate_hole(inode, bidx, bidx + 1); + /* + * if inode page is locked, unlock temporarily, but its reference + * count keeps alive. + */ + if (ino == dn->inode->i_ino && dn->inode_page_locked) + unlock_page(dn->inode_page); + + set_new_dnode(&tdn, inode, NULL, NULL, 0); + if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) + goto out; + + if (tdn.data_blkaddr == blkaddr) + truncate_data_blocks_range(&tdn, 1); + + f2fs_put_dnode(&tdn); +out: + if (ino != dn->inode->i_ino) iput(inode); - } else { - struct dnode_of_data tdn; - set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); - if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) - return 0; - if (tdn.data_blkaddr != NULL_ADDR) - truncate_data_blocks_range(&tdn, 1); - f2fs_put_page(tdn.node_page, 1); - } + else if (dn->inode_page_locked) + lock_page(dn->inode_page); + return 0; + +truncate_out: + if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr) + truncate_data_blocks_range(&tdn, 1); + if (dn->inode->i_ino == nid && !dn->inode_page_locked) + unlock_page(dn->inode_page); return 0; } @@ -334,7 +360,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int start, end; struct dnode_of_data dn; - struct f2fs_summary sum; struct node_info ni; int err = 0, recovered = 0; @@ -342,6 +367,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (IS_INODE(page)) { recover_inline_xattr(inode, page); } else if (f2fs_has_xattr_block(ofs_of_node(page))) { + /* + * Deprecated; xattr blocks should be found from cold log. + * But, we should remain this for backward compatibility. + */ recover_xattr_data(inode, page, blkaddr); goto out; } @@ -376,7 +405,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, src = datablock_addr(dn.node_page, dn.ofs_in_node); dest = datablock_addr(page, dn.ofs_in_node); - if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { + if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && + is_valid_blkaddr(sbi, dest, META_POR)) { + if (src == NULL_ADDR) { err = reserve_new_block(&dn); /* We should not get -ENOSPC */ @@ -388,18 +419,14 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (err) goto err; - set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); - /* write dummy data page */ - recover_data_page(sbi, NULL, &sum, src, dest); - update_extent_cache(dest, &dn); + f2fs_replace_block(sbi, &dn, src, dest, + ni.version, false); recovered++; } dn.ofs_in_node++; } - /* write node page in place */ - set_summary(&sum, dn.nid, 0, 0); if (IS_INODE(dn.node_page)) sync_inode_page(&dn); @@ -433,10 +460,12 @@ static int recover_data(struct f2fs_sb_info *sbi, while (1) { struct fsync_inode_entry *entry; - if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) + if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) break; - page = get_meta_page_ra(sbi, blkaddr); + ra_meta_pages_cond(sbi, blkaddr); + + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) { f2fs_put_page(page, 1); @@ -497,7 +526,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ - sbi->por_doing = true; + set_sbi_flag(sbi, SBI_POR_DOING); /* prevent checkpoint */ mutex_lock(&sbi->cp_mutex); @@ -526,11 +555,11 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); if (err) { - truncate_inode_pages_final(NODE_MAPPING(sbi)); - truncate_inode_pages_final(META_MAPPING(sbi)); + truncate_inode_pages(NODE_MAPPING(sbi), 0); + truncate_inode_pages(META_MAPPING(sbi), 0); } - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); if (err) { discard_next_dnode(sbi, blkaddr); @@ -541,7 +570,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) mutex_unlock(&sbi->cp_mutex); } else if (need_writecp) { struct cp_control cpc = { - .reason = CP_SYNC, + .reason = CP_RECOVERY, }; mutex_unlock(&sbi->cp_mutex); write_checkpoint(sbi, &cpc); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 923cb76f..76f20d48 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -16,10 +16,12 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" #include "node.h" +#include "trace.h" #include #define __reverse_ffz(x) __reverse_ffs(~(x)) @@ -28,6 +30,65 @@ static struct kmem_cache *discard_entry_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; +/** + * Copied from latest lib/llist.c + * llist_for_each_entry_safe - iterate over some deleted entries of + * lock-less list of given type + * safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @node: the first entry of deleted list entries. + * @member: the name of the llist_node with the struct. + * + * In general, some entries of the lock-less list can be traversed + * safely only after being removed from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_entry_safe(pos, n, node, member) \ + for (pos = llist_entry((node), typeof(*pos), member); \ + &pos->member != NULL && \ + (n = llist_entry(pos->member.next, typeof(*n), member), true); \ + pos = n) + +/** + * Copied from latest lib/llist.c + * llist_reverse_order - reverse order of a llist chain + * @head: first item of the list to be reversed + * + * Reverse the order of a chain of llist entries and return the + * new first entry. + */ +struct llist_node *llist_reverse_order(struct llist_node *head) +{ + struct llist_node *new_head = NULL; + + while (head) { + struct llist_node *tmp = head; + head = head->next; + tmp->next = new_head; + new_head = tmp; + } + + return new_head; +} + +/** + * Copied from latest linux/list.h + * list_last_entry - get the last element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + */ +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + /* * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since * MSB and LSB are reversed in a byte by f2fs_set_bit. @@ -74,6 +135,14 @@ static inline unsigned long __reverse_ffs(unsigned long word) static unsigned long __find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { + while (!f2fs_test_bit(offset, (unsigned char *)addr)) + offset++; + + if (offset > size) + offset = size; + + return offset; +#if 0 const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; @@ -120,11 +189,20 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, return result + size; /* Nope. */ found_middle: return result + __reverse_ffs(tmp); +#endif } static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { + while (f2fs_test_bit(offset, (unsigned char *)addr)) + offset++; + + if (offset > size) + offset = size; + + return offset; +#if 0 const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; @@ -172,24 +250,41 @@ static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, return result + size; /* Nope. */ found_middle: return result + __reverse_ffz(tmp); +#endif } void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; + int err; + + SetPagePrivate(page); + f2fs_trace_pid(page); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ new->page = page; INIT_LIST_HEAD(&new->list); - +retry: /* increase reference count with clean state */ mutex_lock(&fi->inmem_lock); + err = radix_tree_insert(&fi->inmem_root, page->index, new); + if (err == -EEXIST) { + mutex_unlock(&fi->inmem_lock); + kmem_cache_free(inmem_entry_slab, new); + return; + } else if (err) { + mutex_unlock(&fi->inmem_lock); + goto retry; + } get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); + + trace_f2fs_register_inmem_page(page, INMEM); } void commit_inmem_pages(struct inode *inode, bool abort) @@ -199,33 +294,54 @@ void commit_inmem_pages(struct inode *inode, bool abort) struct inmem_pages *cur, *tmp; bool submit_bio = false; struct f2fs_io_info fio = { + .sbi = sbi, .type = DATA, - .rw = WRITE_SYNC, + .rw = WRITE_SYNC | REQ_PRIO, + .encrypted_page = NULL, }; - f2fs_balance_fs(sbi); - f2fs_lock_op(sbi); + /* + * The abort is true only when f2fs_evict_inode is called. + * Basically, the f2fs_evict_inode doesn't produce any data writes, so + * that we don't need to call f2fs_balance_fs. + * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this + * inode becomes free by iget_locked in f2fs_iget. + */ + if (!abort) { + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); + } mutex_lock(&fi->inmem_lock); list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { - lock_page(cur->page); - if (!abort && cur->page->mapping == inode->i_mapping) { - f2fs_wait_on_page_writeback(cur->page, DATA); - if (clear_page_dirty_for_io(cur->page)) - inode_dec_dirty_pages(inode); - do_write_data_page(cur->page, &fio); - submit_bio = true; + if (!abort) { + lock_page(cur->page); + if (cur->page->mapping == inode->i_mapping) { + f2fs_wait_on_page_writeback(cur->page, DATA); + if (clear_page_dirty_for_io(cur->page)) + inode_dec_dirty_pages(inode); + trace_f2fs_commit_inmem_page(cur->page, INMEM); + fio.page = cur->page; + do_write_data_page(&fio); + submit_bio = true; + } + f2fs_put_page(cur->page, 1); + } else { + trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); + put_page(cur->page); } - f2fs_put_page(cur->page, 1); + radix_tree_delete(&fi->inmem_root, cur->page->index); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } - if (submit_bio) - f2fs_submit_merged_bio(sbi, DATA, WRITE); mutex_unlock(&fi->inmem_lock); - filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); - f2fs_unlock_op(sbi); + if (!abort) { + f2fs_unlock_op(sbi); + if (submit_bio) + f2fs_submit_merged_bio(sbi, DATA, WRITE); + } } /* @@ -238,7 +354,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. */ - if (has_not_enough_free_secs(sbi, 0)) { + if (has_not_enough_free_secs(sbi, 0) && + !is_sbi_flag_set(sbi, SBI_NO_GC)) { mutex_lock(&sbi->gc_mutex); f2fs_gc(sbi); } @@ -246,9 +363,14 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { + /* try to shrink extent cache when there is no enough memory */ + f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); + /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || - excess_prefree_segs(sbi)) + excess_prefree_segs(sbi) || + !available_free_memory(sbi, INO_ENTRIES) || + jiffies > sbi->cp_expires) f2fs_sync_fs(sbi->sb, true); } @@ -427,69 +549,91 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, { sector_t start = SECTOR_FROM_BLOCK(blkstart); sector_t len = SECTOR_FROM_BLOCK(blklen); + struct seg_entry *se; + unsigned int offset; + block_t i; + + for (i = blkstart; i < blkstart + blklen; i++) { + se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); + offset = GET_BLKOFF_FROM_SEG0(sbi, i); + + if (!f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; + } trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); } void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) { - if (f2fs_issue_discard(sbi, blkaddr, 1)) { - struct page *page = grab_meta_page(sbi, blkaddr); - /* zero-filled page */ - set_page_dirty(page); - f2fs_put_page(page, 1); + int err = -ENOTSUPP; + + if (test_opt(sbi, DISCARD)) { + struct seg_entry *se = get_seg_entry(sbi, + GET_SEGNO(sbi, blkaddr)); + unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + + if (f2fs_test_bit(offset, se->discard_map)) + return; + + err = f2fs_issue_discard(sbi, blkaddr, 1); + } + + if (err) + update_meta_page(sbi, NULL, blkaddr); +} + +static void __add_discard_entry(struct f2fs_sb_info *sbi, + struct cp_control *cpc, struct seg_entry *se, + unsigned int start, unsigned int end) +{ + struct list_head *head = &SM_I(sbi)->discard_list; + struct discard_entry *new, *last; + + if (!list_empty(head)) { + last = list_last_entry(head, struct discard_entry, list); + if (START_BLOCK(sbi, cpc->trim_start) + start == + last->blkaddr + last->len) { + last->len += end - start; + goto done; + } } + + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); + INIT_LIST_HEAD(&new->list); + new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; + new->len = end - start; + list_add_tail(&new->list, head); +done: + SM_I(sbi)->nr_discards += end - start; } static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &SM_I(sbi)->discard_list; - struct discard_entry *new; int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); int max_blocks = sbi->blocks_per_seg; struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; - unsigned long dmap[entries]; + unsigned long *discard_map = (unsigned long *)se->discard_map; + unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; bool force = (cpc->reason == CP_DISCARD); int i; - if (!force && !test_opt(sbi, DISCARD)) + if (se->valid_blocks == max_blocks) return; - if (force && !se->valid_blocks) { - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - /* - * if this segment is registered in the prefree list, then - * we should skip adding a discard candidate, and let the - * checkpoint do that later. - */ - mutex_lock(&dirty_i->seglist_lock); - if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) { - mutex_unlock(&dirty_i->seglist_lock); - cpc->trimmed += sbi->blocks_per_seg; + if (!force) { + if (!test_opt(sbi, DISCARD) || !se->valid_blocks || + SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) return; - } - mutex_unlock(&dirty_i->seglist_lock); - - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start); - new->len = sbi->blocks_per_seg; - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += sbi->blocks_per_seg; - cpc->trimmed += sbi->blocks_per_seg; - return; } - /* zero block will be discarded through the prefree list */ - if (!se->valid_blocks || se->valid_blocks == max_blocks) - return; - /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; + dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : + (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); @@ -497,18 +641,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); - - if (end - start < cpc->trim_minlen) - continue; - - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - cpc->trimmed += end - start; - - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += end - start; + __add_discard_entry(sbi, cpc, se, start, end); } } @@ -538,7 +671,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -void clear_prefree_segments(struct f2fs_sb_info *sbi) +void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct list_head *head = &(SM_I(sbi)->discard_list); struct discard_entry *entry, *this; @@ -571,7 +704,11 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { + if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) + goto skip; f2fs_issue_discard(sbi, entry->blkaddr, entry->len); + cpc->trimmed += entry->len; +skip: list_del(&entry->list); SM_I(sbi)->nr_discards -= entry->len; kmem_cache_free(discard_entry_slab, entry); @@ -620,11 +757,15 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_set_bit(offset, se->cur_valid_map)) + if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); + if (!f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; } else { - if (!f2fs_clear_bit(offset, se->cur_valid_map)) + if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); + if (f2fs_test_and_clear_bit(offset, se->discard_map)) + sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks += del; @@ -683,7 +824,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, /* * Calculate the number of current summary pages for writing */ -int npages_for_summary_flush(struct f2fs_sb_info *sbi) +int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) { int valid_sum_count = 0; int i, sum_in_page; @@ -691,8 +832,13 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi) for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { if (sbi->ckpt->alloc_type[i] == SSR) valid_sum_count += sbi->blocks_per_seg; - else - valid_sum_count += curseg_blkoff(sbi, i); + else { + if (for_ra) + valid_sum_count += le16_to_cpu( + F2FS_CKPT(sbi)->cur_data_blkoff[i]); + else + valid_sum_count += curseg_blkoff(sbi, i); + } } sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - @@ -713,16 +859,25 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); } -static void write_sum_page(struct f2fs_sb_info *sbi, - struct f2fs_summary_block *sum_blk, block_t blk_addr) +void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { struct page *page = grab_meta_page(sbi, blk_addr); - void *kaddr = page_address(page); - memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); + void *dst = page_address(page); + + if (src) + memcpy(dst, src, PAGE_CACHE_SIZE); + else + memset(dst, 0, PAGE_CACHE_SIZE); set_page_dirty(page); f2fs_put_page(page, 1); } +static void write_sum_page(struct f2fs_sb_info *sbi, + struct f2fs_summary_block *sum_blk, block_t blk_addr) +{ + update_meta_page(sbi, (void *)sum_blk, blk_addr); +} + static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -751,7 +906,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, int go_left = 0; int i; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, @@ -824,7 +979,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); __set_inuse(sbi, segno); *newseg = segno; - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) @@ -875,7 +1030,7 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi, { struct seg_entry *se = get_seg_entry(sbi, seg->segno); int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); - unsigned long target_map[entries]; + unsigned long *target_map = SIT_I(sbi)->tmp_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; unsigned long *cur_map = (unsigned long *)se->cur_valid_map; int i, pos; @@ -975,18 +1130,22 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } +static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int old_segno; + + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + locate_dirty_segment(sbi, old_segno); +} + void allocate_new_segments(struct f2fs_sb_info *sbi) { - struct curseg_info *curseg; - unsigned int old_curseg; int i; - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - curseg = CURSEG_I(sbi, i); - old_curseg = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_curseg); - } + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) + __allocate_new_segments(sbi, i); } static const struct segment_allocation default_salloc_ops = { @@ -995,15 +1154,15 @@ static const struct segment_allocation default_salloc_ops = { int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { - __u64 start = range->start >> sbi->log_blocksize; - __u64 end = start + (range->len >> sbi->log_blocksize) - 1; + __u64 start = F2FS_BYTES_TO_BLK(range->start); + __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; unsigned int start_segno, end_segno; struct cp_control cpc; - if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) || - range->len < sbi->blocksize) + if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; + cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -1012,15 +1171,28 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); cpc.reason = CP_DISCARD; - cpc.trim_start = start_segno; - cpc.trim_end = end_segno; - cpc.trim_minlen = range->minlen >> sbi->log_blocksize; - cpc.trimmed = 0; + cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); /* do checkpoint to issue discard commands safely */ - write_checkpoint(sbi, &cpc); + for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { + cpc.trim_start = start_segno; + + if (sbi->discard_blks == 0) + break; + else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) + cpc.trim_end = end_segno; + else + cpc.trim_end = min_t(unsigned int, + rounddown(start_segno + + BATCHED_TRIM_SEGMENTS(sbi), + sbi->segs_per_sec) - 1, end_segno); + + mutex_lock(&sbi->gc_mutex); + write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); + } out: - range->len = cpc.trimmed << sbi->log_blocksize; + range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return 0; } @@ -1050,8 +1222,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && !is_cold_node(page)) - return CURSEG_HOT_NODE; + if (IS_DNODE(page) && is_cold_node(page)) + return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } @@ -1097,10 +1269,18 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; + bool direct_io = (type == CURSEG_DIRECT_IO); + + type = direct_io ? CURSEG_WARM_DATA : type; curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + /* direct_io'ed data is aligned to the segment for better performance */ + if (direct_io && curseg->next_blkoff) + __allocate_new_segments(sbi, type); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); @@ -1111,7 +1291,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, */ __add_sum_entry(sbi, type, sum); - mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); stat_inc_block_count(sbi, curseg); @@ -1132,84 +1311,95 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); } -static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, struct f2fs_io_info *fio) +static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(page, fio->type); + int type = __get_segment_type(fio->page, fio->type); - allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); + allocate_data_block(fio->sbi, fio->page, fio->blk_addr, + &fio->blk_addr, sum, type); /* writeout dirty page into bdev */ - f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); + f2fs_submit_page_mbio(fio); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_io_info fio = { + .sbi = sbi, .type = META, - .rw = WRITE_SYNC | REQ_META | REQ_PRIO + .rw = WRITE_SYNC | REQ_META | REQ_PRIO, + .blk_addr = page->index, + .page = page, + .encrypted_page = NULL, }; set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, page->index, &fio); + f2fs_submit_page_mbio(&fio); } -void write_node_page(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_io_info *fio, - unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) +void write_node_page(unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; + set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio); + do_write_page(&sum, fio); } -void write_data_page(struct page *page, struct dnode_of_data *dn, - block_t *new_blkaddr, struct f2fs_io_info *fio) +void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + struct f2fs_sb_info *sbi = fio->sbi; struct f2fs_summary sum; struct node_info ni; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - - do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio); + do_write_page(&sum, fio); + dn->data_blkaddr = fio->blk_addr; } -void rewrite_data_page(struct page *page, block_t old_blkaddr, - struct f2fs_io_info *fio) +void rewrite_data_page(struct f2fs_io_info *fio) { - f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio); + stat_inc_inplace_blocks(fio->sbi); + f2fs_submit_page_mbio(fio); } -void recover_data_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) +static void __f2fs_replace_block(struct f2fs_sb_info *sbi, + struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr, + bool recover_curseg) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; unsigned int segno, old_cursegno; struct seg_entry *se; int type; + unsigned short old_blkoff; segno = GET_SEGNO(sbi, new_blkaddr); se = get_seg_entry(sbi, segno); type = se->type; - if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { - if (old_blkaddr == NULL_ADDR) - type = CURSEG_COLD_DATA; - else + if (!recover_curseg) { + /* for recovery flow */ + if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (old_blkaddr == NULL_ADDR) + type = CURSEG_COLD_DATA; + else + type = CURSEG_WARM_DATA; + } + } else { + if (!IS_CURSEG(sbi, segno)) type = CURSEG_WARM_DATA; } + curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); old_cursegno = curseg->segno; + old_blkoff = curseg->next_blkoff; /* change the current segment */ if (segno != curseg->segno) { @@ -1223,30 +1413,67 @@ void recover_data_page(struct f2fs_sb_info *sbi, refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); locate_dirty_segment(sbi, old_cursegno); + if (recover_curseg) { + if (old_cursegno != curseg->segno) { + curseg->next_segno = old_cursegno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = old_blkoff; + } + mutex_unlock(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); } +void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, + block_t old_addr, block_t new_addr, + unsigned char version, bool recover_curseg) +{ + struct f2fs_summary sum; + + set_summary(&sum, dn->nid, dn->ofs_in_node, version); + + __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg); + + dn->data_blkaddr = new_addr; + set_data_blkaddr(dn); + f2fs_update_extent_cache(dn); +} + static inline bool is_merged_page(struct f2fs_sb_info *sbi, struct page *page, enum page_type type) { enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = &sbi->write_io[btype]; struct bio_vec *bvec; + struct page *target; int i; down_read(&io->io_rwsem); - if (!io->bio) - goto out; + if (!io->bio) { + up_read(&io->io_rwsem); + return false; + } bio_for_each_segment_all(bvec, io->bio, i) { - if (page == bvec->bv_page) { + + if (bvec->bv_page->mapping) { + target = bvec->bv_page; + } else { + struct f2fs_crypto_ctx *ctx; + + /* encrypted page */ + ctx = (struct f2fs_crypto_ctx *)page_private( + bvec->bv_page); + target = ctx->w.control_page; + } + + if (page == target) { up_read(&io->io_rwsem); return true; } } -out: up_read(&io->io_rwsem); return false; } @@ -1339,7 +1566,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) segno = le32_to_cpu(ckpt->cur_data_segno[type]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - CURSEG_HOT_DATA]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); else blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); @@ -1348,7 +1575,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) CURSEG_HOT_NODE]); blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - CURSEG_HOT_NODE]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, type - CURSEG_HOT_NODE); else @@ -1359,7 +1586,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) sum = (struct f2fs_summary_block *)page_address(new); if (IS_NODESEG(type)) { - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { + if (__exist_node_summaries(sbi)) { struct f2fs_summary *ns = &sum->entries[0]; int i; for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { @@ -1396,12 +1623,22 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) int err; if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + int npages = npages_for_summary_flush(sbi, true); + + if (npages >= 2) + ra_meta_pages(sbi, start_sum_block(sbi), npages, + META_CP); + /* restore for compacted data summary */ if (read_compacted_summaries(sbi)) return -EINVAL; type = CURSEG_HOT_NODE; } + if (__exist_node_summaries(sbi)) + ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), + NR_CURSEG_TYPE - type, META_CP); + for (; type <= CURSEG_COLD_NODE; type++) { err = read_normal_summaries(sbi, type); if (err) @@ -1495,8 +1732,7 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) - write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); + write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); } int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, @@ -1524,17 +1760,7 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(segno); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, segno); - - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) - blk_addr += sit_i->sit_blocks; - - return get_meta_page(sbi, blk_addr); + return get_meta_page(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, @@ -1664,6 +1890,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); + if (!sit_i->dirty_sentries) + goto out; + /* * add and account sit entries of dirty bitmap in sit entry * set temporarily @@ -1678,16 +1907,13 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) remove_sits_in_journal(sbi); - if (!sit_i->dirty_sentries) - goto out; - /* * there are two steps to flush sit entries: * #1, flush sit entries to journal in current cold data summary block. * #2, flush sit entries to sit page. */ list_for_each_entry_safe(ses, tmp, head, set_list) { - struct page *page; + struct page *page = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start_segno = ses->start_segno; unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, @@ -1710,7 +1936,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) se = get_seg_entry(sbi, segno); /* add discard candidates */ - if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { + if (cpc->reason != CP_DISCARD) { cpc->trim_start = segno; add_discard_addrs(sbi, cpc); } @@ -1784,11 +2010,18 @@ static int build_sit_info(struct f2fs_sb_info *sbi) = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - if (!sit_i->sentries[start].cur_valid_map - || !sit_i->sentries[start].ckpt_valid_map) + sit_i->sentries[start].discard_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map || + !sit_i->sentries[start].ckpt_valid_map || + !sit_i->sentries[start].discard_map) return -ENOMEM; } + sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->tmp_map) + return -ENOMEM; + if (sbi->segs_per_sec > 1) { sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry)); @@ -1853,7 +2086,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); free_i->free_segments = 0; free_i->free_sections = 0; - rwlock_init(&free_i->segmap_lock); + spin_lock_init(&free_i->segmap_lock); return 0; } @@ -1919,6 +2152,11 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) got_it: check_block_count(sbi, start, &sit); seg_info_from_raw_sit(se, &sit); + + /* build discard map only one time */ + memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks; + if (sbi->segs_per_sec > 1) { struct sec_entry *e = get_sec_entry(sbi, start); e->valid_blocks += se->valid_blocks; @@ -2066,6 +2304,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->nr_discards = 0; sm_info->max_discards = 0; + sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; + INIT_LIST_HEAD(&sm_info->sit_entry_set); if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { @@ -2166,8 +2406,11 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) for (start = 0; start < MAIN_SEGS(sbi); start++) { kfree(sit_i->sentries[start].cur_valid_map); kfree(sit_i->sentries[start].ckpt_valid_map); + kfree(sit_i->sentries[start].discard_map); } } + kfree(sit_i->tmp_map); + vfree(sit_i->sentries); vfree(sit_i->sec_entries); kfree(sit_i->dirty_sentries_bitmap); @@ -2200,7 +2443,7 @@ int __init create_segment_manager_caches(void) goto fail; sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", - sizeof(struct nat_entry_set)); + sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destory_discard_entry; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2495bec1..84963577 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -163,6 +163,7 @@ struct seg_entry { */ unsigned short ckpt_valid_blocks; unsigned char *ckpt_valid_map; + unsigned char *discard_map; unsigned char type; /* segment type like CURSEG_XXX_TYPE */ unsigned long long mtime; /* modification time of the segment */ }; @@ -189,6 +190,7 @@ struct sit_info { char *sit_bitmap; /* SIT bitmap pointer */ unsigned int bitmap_size; /* SIT bitmap size */ + unsigned long *tmp_map; /* bitmap for temporal use */ unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ unsigned int dirty_sentries; /* # of dirty sentries */ unsigned int sents_per_block; /* # of SIT entries per block */ @@ -207,7 +209,7 @@ struct free_segmap_info { unsigned int start_segno; /* start segment number logically */ unsigned int free_segments; /* # of free segments */ unsigned int free_sections; /* # of free sections */ - rwlock_t segmap_lock; /* free segmap lock */ + spinlock_t segmap_lock; /* free segmap lock */ unsigned long *free_segmap; /* free segment bitmap */ unsigned long *free_secmap; /* free section bitmap */ }; @@ -318,9 +320,9 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, unsigned int max, unsigned int segno) { unsigned int ret; - read_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); ret = find_next_bit(free_i->free_segmap, max, segno); - read_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); return ret; } @@ -331,16 +333,17 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int start_segno = secno * sbi->segs_per_sec; unsigned int next; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); free_i->free_segments++; - next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); + next = find_next_bit(free_i->free_segmap, + start_segno + sbi->segs_per_sec, start_segno); if (next >= start_segno + sbi->segs_per_sec) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void __set_inuse(struct f2fs_sb_info *sbi, @@ -362,7 +365,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int start_segno = secno * sbi->segs_per_sec; unsigned int next; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (test_and_clear_bit(segno, free_i->free_segmap)) { free_i->free_segments++; @@ -373,7 +376,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, free_i->free_sections++; } } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, @@ -381,13 +384,13 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int secno = segno / sbi->segs_per_sec; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (!test_and_set_bit(segno, free_i->free_segmap)) { free_i->free_segments--; if (!test_and_set_bit(secno, free_i->free_secmap)) free_i->free_sections--; } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, @@ -460,7 +463,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + @@ -599,13 +602,13 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) { if (segno > TOTAL_SEGS(sbi) - 1) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); } static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) { if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); } /* @@ -616,11 +619,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, { /* check segment usage */ if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); /* check boundary of a given segment number */ if (segno > TOTAL_SEGS(sbi) - 1) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); } #endif @@ -657,10 +660,7 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) { unsigned int block_off = SIT_BLOCK_OFFSET(start); - if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) - f2fs_clear_bit(block_off, sit_i->sit_bitmap); - else - f2fs_set_bit(block_off, sit_i->sit_bitmap); + f2fs_change_bit(block_off, sit_i->sit_bitmap); } static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) @@ -714,6 +714,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) */ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) { + if (sbi->sb->s_bdi->dirty_exceeded) + return 0; + if (type == DATA) return sbi->blocks_per_seg; else if (type == NODE) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41d6f700..75f8fe9a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -30,6 +30,7 @@ #include "segment.h" #include "xattr.h" #include "gc.h" +#include "trace.h" #define CREATE_TRACE_POINTS #include @@ -41,6 +42,7 @@ static struct kset *f2fs_kset; enum { Opt_gc_background, Opt_disable_roll_forward, + Opt_norecovery, Opt_discard, Opt_noheap, Opt_user_xattr, @@ -51,14 +53,19 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_inline_data, + Opt_inline_dentry, Opt_flush_merge, Opt_nobarrier, + Opt_fastboot, + Opt_extent_cache, + Opt_noinline_data, Opt_err, }; static match_table_t f2fs_tokens = { {Opt_gc_background, "background_gc=%s"}, {Opt_disable_roll_forward, "disable_roll_forward"}, + {Opt_norecovery, "norecovery"}, {Opt_discard, "discard"}, {Opt_noheap, "no_heap"}, {Opt_user_xattr, "user_xattr"}, @@ -69,8 +76,12 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_inline_data, "inline_data"}, + {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, + {Opt_fastboot, "fastboot"}, + {Opt_extent_cache, "extent_cache"}, + {Opt_noinline_data, "noinline_data"}, {Opt_err, NULL}, }; @@ -188,12 +199,14 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -203,12 +216,14 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle), ATTR_LIST(reclaim_segments), ATTR_LIST(max_small_discards), + ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), ATTR_LIST(max_victim_search), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), + ATTR_LIST(cp_interval), NULL, }; @@ -245,6 +260,7 @@ static void init_once(void *foo) static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct request_queue *q; substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; @@ -282,8 +298,21 @@ static int parse_options(struct super_block *sb, char *options) case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); break; + case Opt_norecovery: + /* this option mounts f2fs with ro */ + set_opt(sbi, DISABLE_ROLL_FORWARD); + if (!f2fs_readonly(sb)) + return -EINVAL; + break; case Opt_discard: - set_opt(sbi, DISCARD); + q = bdev_get_queue(sb->s_bdev); + if (blk_queue_discard(q)) { + set_opt(sbi, DISCARD); + } else { + f2fs_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but " + "the device does not support discard"); + } break; case Opt_noheap: set_opt(sbi, NOHEAP); @@ -340,12 +369,24 @@ static int parse_options(struct super_block *sb, char *options) case Opt_inline_data: set_opt(sbi, INLINE_DATA); break; + case Opt_inline_dentry: + set_opt(sbi, INLINE_DENTRY); + break; case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; + case Opt_fastboot: + set_opt(sbi, FASTBOOT); + break; + case Opt_extent_cache: + set_opt(sbi, EXTENT_CACHE); + break; + case Opt_noinline_data: + clear_opt(sbi, INLINE_DATA); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -371,8 +412,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; fi->i_advise = 0; - rwlock_init(&fi->ext.ext_lock); + rwlock_init(&fi->ext_lock); init_rwsem(&fi->i_sem); + INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); @@ -384,6 +426,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; +#ifdef CONFIG_F2FS_FS_ENCRYPTION + fi->i_crypt_info = NULL; +#endif return &fi->vfs_inode; } @@ -396,8 +441,31 @@ static int f2fs_drop_inode(struct inode *inode) * - f2fs_gc -> iput -> evict * - inode_wait_for_writeback(inode) */ - if (!inode_unhashed(inode) && inode->i_state & I_SYNC) + if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { + if (!inode->i_nlink && !is_bad_inode(inode)) { + spin_unlock(&inode->i_lock); + + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode)) + commit_inmem_pages(inode, true); + + sb_start_intwrite(inode->i_sb); + i_size_write(inode, 0); + + if (F2FS_HAS_BLOCKS(inode)) + f2fs_truncate(inode); + + sb_end_intwrite(inode->i_sb); + +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_I(inode)->i_crypt_info) + f2fs_free_encryption_info(inode, + F2FS_I(inode)->i_crypt_info); +#endif + spin_lock(&inode->i_lock); + } return 0; + } return generic_drop_inode(inode); } @@ -435,8 +503,13 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_stats(sbi); stop_gc_thread(sbi); - /* We don't need to do checkpoint when it's clean */ - if (sbi->s_dirty) { + /* + * We don't need to do checkpoint when superblock is clean. + * But, the previous checkpoint was not done by umount, it needs to do + * clean checkpoint again. + */ + if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || + !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) { struct cp_control cpc = { .reason = CP_UMOUNT, }; @@ -473,15 +546,17 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); if (sync) { - struct cp_control cpc = { - .reason = CP_SYNC, - }; + struct cp_control cpc; + + cpc.reason = __get_cp_reason(sbi); + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } else { f2fs_balance_fs(sbi); } + f2fs_trace_ios(NULL, 1); return 0; } @@ -562,10 +637,18 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); + else + seq_puts(seq, ",noinline_data"); + if (test_opt(sbi, INLINE_DENTRY)) + seq_puts(seq, ",inline_dentry"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) seq_puts(seq, ",nobarrier"); + if (test_opt(sbi, FASTBOOT)) + seq_puts(seq, ",fastboot"); + if (test_opt(sbi, EXTENT_CACHE)) + seq_puts(seq, ",extent_cache"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; @@ -611,6 +694,22 @@ static const struct file_operations f2fs_seq_segment_info_fops = { .release = single_release, }; +static void default_options(struct f2fs_sb_info *sbi) +{ + /* init some FS parameters */ + sbi->active_logs = NR_CURSEG_TYPE; + + set_opt(sbi, BG_GC); + set_opt(sbi, INLINE_DATA); + +#ifdef CONFIG_F2FS_FS_XATTR + set_opt(sbi, XATTR_USER); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + set_opt(sbi, POSIX_ACL); +#endif +} + static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -629,7 +728,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) active_logs = sbi->active_logs; sbi->mount_opt.opt = 0; - sbi->active_logs = NR_CURSEG_TYPE; + default_options(sbi); /* parse mount options */ err = parse_options(sb, data); @@ -654,7 +753,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) f2fs_sync_fs(sb, 1); need_restart_gc = true; } - } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + } else if (!sbi->gc_thread) { err = start_gc_thread(sbi); if (err) goto restore_opts; @@ -667,7 +766,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { destroy_flush_cmd_control(sbi); - } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) { + } else if (!SM_I(sbi)->cmd_control_info) { err = create_flush_cmd_control(sbi); if (err) goto restore_gc; @@ -752,7 +851,7 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -static loff_t max_file_size(unsigned bits) +static loff_t max_file_blocks(void) { loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); loff_t leaf_count = ADDRS_PER_BLOCK; @@ -768,7 +867,6 @@ static loff_t max_file_size(unsigned bits) leaf_count *= NIDS_PER_BLOCK; result += leaf_count; - result <<= bits; return result; } @@ -872,7 +970,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); sbi->dir_level = DEF_DIR_LEVEL; - sbi->need_fsck = false; + sbi->cp_interval = DEF_CP_INTERVAL; + clear_sbi_flag(sbi, SBI_NEED_FSCK); } /* @@ -882,29 +981,36 @@ static void init_sb_info(struct f2fs_sb_info *sbi) */ static int read_raw_super_block(struct super_block *sb, struct f2fs_super_block **raw_super, - struct buffer_head **raw_super_buf) + struct buffer_head **raw_super_buf, + int *recovery) { int block = 0; + struct buffer_head *buffer; + struct f2fs_super_block *super; + int err = 0; retry: - *raw_super_buf = sb_bread(sb, block); - if (!*raw_super_buf) { + buffer = sb_bread(sb, block); + if (!buffer) { + *recovery = 1; f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", block + 1); if (block == 0) { block++; goto retry; } else { - return -EIO; + err = -EIO; + goto out; } } - *raw_super = (struct f2fs_super_block *) - ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); + super = (struct f2fs_super_block *) + ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET); /* sanity checking of raw super */ - if (sanity_check_raw_super(sb, *raw_super)) { - brelse(*raw_super_buf); + if (sanity_check_raw_super(sb, super)) { + brelse(buffer); + *recovery = 1; f2fs_msg(sb, KERN_ERR, "Can't find valid F2FS filesystem in %dth superblock", block + 1); @@ -912,24 +1018,76 @@ static int read_raw_super_block(struct super_block *sb, block++; goto retry; } else { - return -EINVAL; + err = -EINVAL; + goto out; } } + if (!*raw_super) { + *raw_super_buf = buffer; + *raw_super = super; + } else { + /* already have a valid superblock */ + brelse(buffer); + } + + /* check the validity of the second superblock */ + if (block == 0) { + block++; + goto retry; + } + +out: + /* No valid superblock */ + if (!*raw_super) + return err; + return 0; } +int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) +{ + struct buffer_head *sbh = sbi->raw_super_buf; + sector_t block = sbh->b_blocknr; + int err; + + /* write back-up superblock first */ + sbh->b_blocknr = block ? 0 : 1; + mark_buffer_dirty(sbh); + err = sync_dirty_buffer(sbh); + + sbh->b_blocknr = block; + + /* if we are in recovery path, skip writing valid superblock */ + if (recover || err) + goto out; + + /* write current valid superblock */ + mark_buffer_dirty(sbh); + err = sync_dirty_buffer(sbh); +out: + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); + return err; +} + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; struct buffer_head *raw_super_buf; struct inode *root; - long err = -EINVAL; - bool retry = true; - int i; + long err; + bool retry = true, need_fsck = false; + char *options = NULL; + int recovery, i; try_onemore: + err = -EINVAL; + raw_super = NULL; + raw_super_buf = NULL; + recovery = 0; + /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); if (!sbi) @@ -941,28 +1099,26 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - err = read_raw_super_block(sb, &raw_super, &raw_super_buf); + err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery); if (err) goto free_sbi; sb->s_fs_info = sbi; - /* init some FS parameters */ - sbi->active_logs = NR_CURSEG_TYPE; - - set_opt(sbi, BG_GC); - -#ifdef CONFIG_F2FS_FS_XATTR - set_opt(sbi, XATTR_USER); -#endif -#ifdef CONFIG_F2FS_FS_POSIX_ACL - set_opt(sbi, POSIX_ACL); -#endif + default_options(sbi); /* parse mount options */ - err = parse_options(sb, (char *)data); - if (err) + options = kstrdup((const char *)data, GFP_KERNEL); + if (data && !options) { + err = -ENOMEM; goto free_sb_buf; + } + + err = parse_options(sb, options); + if (err) + goto free_options; - sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); + sbi->max_file_blocks = max_file_blocks(); + sb->s_maxbytes = sbi->max_file_blocks << + le32_to_cpu(raw_super->log_blocksize); sb->s_max_links = F2FS_LINK_MAX; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); @@ -983,7 +1139,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); spin_lock_init(&sbi->stat_lock); init_rwsem(&sbi->read_io.io_rwsem); @@ -1004,7 +1160,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (IS_ERR(sbi->meta_inode)) { f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); - goto free_sb_buf; + goto free_options; } err = get_valid_checkpoint(sbi); @@ -1032,6 +1188,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&sbi->dir_inode_list); spin_lock_init(&sbi->dir_inode_lock); + init_extent_cache_info(sbi); + init_ino_entry_info(sbi); /* setup f2fs internal modules */ @@ -1091,14 +1249,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) proc_create_data("segment_info", S_IRUGO, sbi->s_proc, &f2fs_seq_segment_info_fops, sb); - if (test_opt(sbi, DISCARD)) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) - f2fs_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } - sbi->s_kobj.kset = f2fs_kset; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, @@ -1106,13 +1256,24 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_proc; - if (!retry) - sbi->need_fsck = true; - /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + /* + * mount should be failed, when device has readonly mode, and + * previous checkpoint was not done by clean system shutdown. + */ + if (bdev_read_only(sb->s_bdev) && + !is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) { + err = -EROFS; + goto free_kobj; + } + + if (need_fsck) + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = recover_fsync_data(sbi); if (err) { + need_fsck = true; f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%ld", err); goto free_kobj; @@ -1123,12 +1284,22 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (!f2fs_readonly(sb)) { + if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) goto free_kobj; } + kfree(options); + + /* recover broken superblock */ + if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) { + f2fs_msg(sb, KERN_INFO, "Recover invalid superblock"); + f2fs_commit_super(sbi, true); + } + + sbi->cp_expires = round_jiffies_up(jiffies); + return 0; free_kobj: @@ -1153,6 +1324,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); +free_options: + kfree(options); free_sb_buf: brelse(raw_super_buf); free_sbi: @@ -1160,7 +1333,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* give only one another chance */ if (retry) { - retry = 0; + retry = false; shrink_dcache_sb(sb); goto try_onemore; } @@ -1173,11 +1346,18 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); } +static void kill_f2fs_super(struct super_block *sb) +{ + if (sb->s_root) + set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); + kill_block_super(sb); +} + static struct file_system_type f2fs_fs_type = { .owner = THIS_MODULE, .name = "f2fs", .mount = f2fs_mount, - .kill_sb = kill_block_super, + .kill_sb = kill_f2fs_super, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("f2fs"); @@ -1205,6 +1385,8 @@ static int __init init_f2fs_fs(void) { int err; + f2fs_build_trace_ios(); + err = init_inodecache(); if (err) goto fail; @@ -1214,30 +1396,35 @@ static int __init init_f2fs_fs(void) err = create_segment_manager_caches(); if (err) goto free_node_manager_caches; - err = create_gc_caches(); + err = create_checkpoint_caches(); if (err) goto free_segment_manager_caches; - err = create_checkpoint_caches(); + err = create_extent_cache(); if (err) - goto free_gc_caches; + goto free_checkpoint_caches; f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); if (!f2fs_kset) { err = -ENOMEM; - goto free_checkpoint_caches; + goto free_extent_cache; } - err = register_filesystem(&f2fs_fs_type); + err = f2fs_init_crypto(); if (err) goto free_kset; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_crypto; f2fs_create_root_stats(); f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); return 0; +free_crypto: + f2fs_exit_crypto(); free_kset: kset_unregister(f2fs_kset); +free_extent_cache: + destroy_extent_cache(); free_checkpoint_caches: destroy_checkpoint_caches(); -free_gc_caches: - destroy_gc_caches(); free_segment_manager_caches: destroy_segment_manager_caches(); free_node_manager_caches: @@ -1253,12 +1440,14 @@ static void __exit exit_f2fs_fs(void) remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); + f2fs_exit_crypto(); + destroy_extent_cache(); destroy_checkpoint_caches(); - destroy_gc_caches(); destroy_segment_manager_caches(); destroy_node_manager_caches(); destroy_inodecache(); kset_unregister(f2fs_kset); + f2fs_destroy_trace_ios(); } module_init(init_f2fs_fs) diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c new file mode 100644 index 00000000..145fb659 --- /dev/null +++ b/fs/f2fs/trace.c @@ -0,0 +1,159 @@ +/* + * f2fs IO tracer + * + * Copyright (c) 2014 Motorola Mobility + * Copyright (c) 2014 Jaegeuk Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + +#include "f2fs.h" +#include "trace.h" + +static RADIX_TREE(pids, GFP_ATOMIC); +static spinlock_t pids_lock; +static struct last_io_info last_io; + +static inline void __print_last_io(void) +{ + if (!last_io.len) + return; + + trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n", + last_io.major, last_io.minor, + last_io.pid, "----------------", + last_io.type, + last_io.fio.rw, last_io.fio.blk_addr, + last_io.len); + memset(&last_io, 0, sizeof(last_io)); +} + +static int __file_type(struct inode *inode, pid_t pid) +{ + if (f2fs_is_atomic_file(inode)) + return __ATOMIC_FILE; + else if (f2fs_is_volatile_file(inode)) + return __VOLATILE_FILE; + else if (S_ISDIR(inode->i_mode)) + return __DIR_FILE; + else if (inode->i_ino == F2FS_NODE_INO(F2FS_I_SB(inode))) + return __NODE_FILE; + else if (inode->i_ino == F2FS_META_INO(F2FS_I_SB(inode))) + return __META_FILE; + else if (pid) + return __NORMAL_FILE; + else + return __MISC_FILE; +} + +void f2fs_trace_pid(struct page *page) +{ + struct inode *inode = page->mapping->host; + pid_t pid = task_pid_nr(current); + void *p; + + page->private = pid; + + if (radix_tree_preload(GFP_NOFS)) + return; + + spin_lock(&pids_lock); + p = radix_tree_lookup(&pids, pid); + if (p == current) + goto out; + if (p) + radix_tree_delete(&pids, pid); + + f2fs_radix_tree_insert(&pids, pid, current); + + trace_printk("%3x:%3x %4x %-16s\n", + MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), + pid, current->comm); +out: + spin_unlock(&pids_lock); + radix_tree_preload_end(); +} + +void f2fs_trace_ios(struct f2fs_io_info *fio, int flush) +{ + struct inode *inode; + pid_t pid; + int major, minor; + + if (flush) { + __print_last_io(); + return; + } + + inode = fio->page->mapping->host; + pid = page_private(fio->page); + + major = MAJOR(inode->i_sb->s_dev); + minor = MINOR(inode->i_sb->s_dev); + + if (last_io.major == major && last_io.minor == minor && + last_io.pid == pid && + last_io.type == __file_type(inode, pid) && + last_io.fio.rw == fio->rw && + last_io.fio.blk_addr + last_io.len == fio->blk_addr) { + last_io.len++; + return; + } + + __print_last_io(); + + last_io.major = major; + last_io.minor = minor; + last_io.pid = pid; + last_io.type = __file_type(inode, pid); + last_io.fio = *fio; + last_io.len = 1; + return; +} + +void f2fs_build_trace_ios(void) +{ + spin_lock_init(&pids_lock); +} + +#define PIDVEC_SIZE 128 +static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index, + unsigned int max_items) +{ + struct radix_tree_iter iter; + void **slot; + unsigned int ret = 0; + + if (unlikely(!max_items)) + return 0; + + radix_tree_for_each_slot(slot, &pids, &iter, first_index) { + results[ret] = iter.index; + if (++ret == PIDVEC_SIZE) + break; + } + return ret; +} + +void f2fs_destroy_trace_ios(void) +{ + pid_t pid[PIDVEC_SIZE]; + pid_t next_pid = 0; + unsigned int found; + + spin_lock(&pids_lock); + while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { + unsigned idx; + + next_pid = pid[found - 1] + 1; + for (idx = 0; idx < found; idx++) + radix_tree_delete(&pids, pid[idx]); + } + spin_unlock(&pids_lock); +} diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h new file mode 100644 index 00000000..67db24ac --- /dev/null +++ b/fs/f2fs/trace.h @@ -0,0 +1,46 @@ +/* + * f2fs IO tracer + * + * Copyright (c) 2014 Motorola Mobility + * Copyright (c) 2014 Jaegeuk Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_TRACE_H__ +#define __F2FS_TRACE_H__ + +#ifdef CONFIG_F2FS_IO_TRACE +#include + +enum file_type { + __NORMAL_FILE, + __DIR_FILE, + __NODE_FILE, + __META_FILE, + __ATOMIC_FILE, + __VOLATILE_FILE, + __MISC_FILE, +}; + +struct last_io_info { + int major, minor; + pid_t pid; + enum file_type type; + struct f2fs_io_info fio; + block_t len; +}; + +extern void f2fs_trace_pid(struct page *); +extern void f2fs_trace_ios(struct f2fs_io_info *, int); +extern void f2fs_build_trace_ios(void); +extern void f2fs_destroy_trace_ios(void); +#else +#define f2fs_trace_pid(p) +#define f2fs_trace_ios(i, n) +#define f2fs_build_trace_ios() +#define f2fs_destroy_trace_ios() + +#endif +#endif /* __F2FS_TRACE_H__ */ diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index deca8728..93dd554b 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -21,7 +21,6 @@ #include #include #include -#include #include "f2fs.h" #include "xattr.h" @@ -83,7 +82,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL); } static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, @@ -135,7 +134,8 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, if (strcmp(name, "") != 0) return -EINVAL; - *((char *)buffer) = F2FS_I(inode)->i_advise; + if (buffer) + *((char *)buffer) = F2FS_I(inode)->i_advise; return sizeof(char); } @@ -152,6 +152,7 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, return -EINVAL; F2FS_I(inode)->i_advise |= *(char *)value; + mark_inode_dirty(inode); return 0; } @@ -215,8 +216,8 @@ const struct xattr_handler f2fs_xattr_security_handler = { static const struct xattr_handler *f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, #ifdef CONFIG_F2FS_FS_POSIX_ACL - [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, - [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler, + [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler, + [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, #endif [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, #ifdef CONFIG_F2FS_FS_SECURITY @@ -228,8 +229,8 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = { const struct xattr_handler *f2fs_xattr_handlers[] = { &f2fs_xattr_user_handler, #ifdef CONFIG_F2FS_FS_POSIX_ACL - &posix_acl_access_xattr_handler, - &posix_acl_default_xattr_handler, + &f2fs_xattr_acl_access_handler, + &f2fs_xattr_acl_default_handler, #endif &f2fs_xattr_trusted_handler, #ifdef CONFIG_F2FS_FS_SECURITY @@ -398,7 +399,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } int f2fs_getxattr(struct inode *inode, int index, const char *name, - void *buffer, size_t buffer_size) + void *buffer, size_t buffer_size, struct page *ipage) { struct f2fs_xattr_entry *entry; void *base_addr; @@ -412,7 +413,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - base_addr = read_all_xattrs(inode, NULL); + base_addr = read_all_xattrs(inode, ipage); if (!base_addr) return -ENOMEM; @@ -582,11 +583,21 @@ static int __f2fs_setxattr(struct inode *inode, int index, inode->i_ctime = CURRENT_TIME; clear_inode_flag(fi, FI_ACL_MODE); } + if (index == F2FS_XATTR_INDEX_ENCRYPTION && + !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) + f2fs_set_encrypted_inode(inode); if (ipage) update_inode(inode, ipage); else update_inode_page(inode); + +#ifdef CONFIG_F2FS_EMULATED_SD + if (S_ISDIR(inode->i_mode) && len == F2FS_XATTR_DIR_NOCASE_LEN && + !memcmp(name, F2FS_XATTR_DIR_NOCASE, len)) + f2fs_set_nocase_dop(inode); +#endif + exit: kzfree(base_addr); return error; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 34ab7dbc..049471db 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -35,6 +35,16 @@ #define F2FS_XATTR_INDEX_LUSTRE 5 #define F2FS_XATTR_INDEX_SECURITY 6 #define F2FS_XATTR_INDEX_ADVISE 7 +/* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */ +#define F2FS_XATTR_INDEX_ENCRYPTION 9 + +#define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c" + +#ifdef CONFIG_F2FS_EMULATED_SD +/* user.nocase xattribute for case-insensitive dir support */ +#define F2FS_XATTR_DIR_NOCASE "nocase" +#define F2FS_XATTR_DIR_NOCASE_LEN 6 +#endif struct f2fs_xattr_header { __le32 h_magic; /* magic number for identification */ @@ -108,6 +118,8 @@ struct f2fs_xattr_entry { #ifdef CONFIG_F2FS_FS_XATTR extern const struct xattr_handler f2fs_xattr_user_handler; extern const struct xattr_handler f2fs_xattr_trusted_handler; +extern const struct xattr_handler f2fs_xattr_acl_access_handler; +extern const struct xattr_handler f2fs_xattr_acl_default_handler; extern const struct xattr_handler f2fs_xattr_advise_handler; extern const struct xattr_handler f2fs_xattr_security_handler; @@ -115,7 +127,8 @@ extern const struct xattr_handler *f2fs_xattr_handlers[]; extern int f2fs_setxattr(struct inode *, int, const char *, const void *, size_t, struct page *, int); -extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, + size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else @@ -126,7 +139,8 @@ static inline int f2fs_setxattr(struct inode *inode, int index, return -EOPNOTSUPP; } static inline int f2fs_getxattr(struct inode *inode, int index, - const char *name, void *buffer, size_t buffer_size) + const char *name, void *buffer, + size_t buffer_size, struct page *dpage) { return -EOPNOTSUPP; } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 860313a3..920408a2 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,12 +19,16 @@ #define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ #define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ +#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ +#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) +#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) + /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 @@ -33,7 +37,8 @@ #define F2FS_META_INO(sbi) (sbi->meta_ino_num) /* This flag is used by node and meta inodes, and by recovery */ -#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) +#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) /* * For further optimization on multi-head logs, on-disk layout supports maximum @@ -45,6 +50,8 @@ #define MAX_ACTIVE_NODE_LOGS 8 #define MAX_ACTIVE_DATA_LOGS 8 +#define VERSION_LEN 256 + /* * For superblock */ @@ -81,11 +88,18 @@ struct f2fs_super_block { __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ __le32 cp_payload; + __u8 version[VERSION_LEN]; /* the kernel version */ + __u8 init_version[VERSION_LEN]; /* the initial kernel version */ + __le32 feature; /* defined features */ + __u8 encryption_level; /* versioning level for encryption */ + __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ + __u8 reserved[871]; /* valid reserved region */ } __packed; /* * For checkpoint */ +#define CP_FASTBOOT_FLAG 0x00000020 #define CP_FSCK_FLAG 0x00000010 #define CP_ERROR_FLAG 0x00000008 #define CP_COMPACT_SUM_FLAG 0x00000004 @@ -147,7 +161,7 @@ struct f2fs_orphan_block { */ struct f2fs_extent { __le32 fofs; /* start file offset of the extent */ - __le32 blk_addr; /* start block address of the extent */ + __le32 blk; /* start block address of the extent */ __le32 len; /* lengh of the extent */ } __packed; @@ -170,14 +184,13 @@ struct f2fs_extent { #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ +#define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ +#define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) -#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) -\ - sizeof(__le32) * (DEF_ADDRS_PER_INODE + \ - DEF_NIDS_PER_INODE - 1)) - struct f2fs_inode { __le16 i_mode; /* file mode */ __u8 i_advise; /* file hints */ @@ -225,6 +238,8 @@ enum { OFFSET_BIT_SHIFT }; +#define OFFSET_BIT_MASK (0x07) /* (0x01 << OFFSET_BIT_SHIFT) - 1 */ + struct node_footer { __le32 nid; /* node id */ __le32 ino; /* inode nunmber */ @@ -435,6 +450,24 @@ struct f2fs_dentry_block { __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; +/* for inline dir */ +#define NR_INLINE_DENTRY (MAX_INLINE_DATA * BITS_PER_BYTE / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + BITS_PER_BYTE + 1)) +#define INLINE_DENTRY_BITMAP_SIZE ((NR_INLINE_DENTRY + \ + BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_RESERVED_SIZE (MAX_INLINE_DATA - \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE)) + +/* inline directory entry structure */ +struct f2fs_inline_dentry { + __u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE]; + __u8 reserved[INLINE_RESERVED_SIZE]; + struct f2fs_dir_entry dentry[NR_INLINE_DENTRY]; + __u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN]; +} __packed; + /* file types used in inode_info->flags */ enum { F2FS_FT_UNKNOWN, diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index bbc4de9b..e5a03ef1 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -14,7 +14,11 @@ { NODE, "NODE" }, \ { DATA, "DATA" }, \ { META, "META" }, \ - { META_FLUSH, "META_FLUSH" }) + { META_FLUSH, "META_FLUSH" }, \ + { INMEM, "INMEM" }, \ + { INMEM_DROP, "INMEM_DROP" }, \ + { IPU, "IN-PLACE" }, \ + { OPU, "OUT-OF-PLACE" }) #define F2FS_BIO_MASK(t) (t & (READA | WRITE_FLUSH_FUA)) #define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) @@ -72,10 +76,13 @@ #define show_cpreason(type) \ __print_symbolic(type, \ { CP_UMOUNT, "Umount" }, \ + { CP_FASTBOOT, "Fastboot" }, \ { CP_SYNC, "Sync" }, \ + { CP_RECOVERY, "Recovery" }, \ { CP_DISCARD, "Discard" }) struct victim_sel_policy; +struct f2fs_map_blocks; DECLARE_EVENT_CLASS(f2fs__inode, @@ -148,14 +155,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, TRACE_EVENT(f2fs_sync_file_exit, - TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret), + TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret), TP_ARGS(inode, need_cp, datasync, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(bool, need_cp) + __field(int, need_cp) __field(int, datasync) __field(int, ret) ), @@ -190,7 +197,7 @@ TRACE_EVENT(f2fs_sync_fs, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->dirty = F2FS_SB(sb)->s_dirty; + __entry->dirty = is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY); __entry->wait = wait; ), @@ -440,68 +447,35 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, __entry->err) ); -TRACE_EVENT_CONDITION(f2fs_submit_page_bio, +TRACE_EVENT(f2fs_map_blocks, + TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret), - TP_PROTO(struct page *page, sector_t blkaddr, int type), - - TP_ARGS(page, blkaddr, type), - - TP_CONDITION(page->mapping), + TP_ARGS(inode, map, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(pgoff_t, index) - __field(sector_t, blkaddr) - __field(int, type) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->index = page->index; - __entry->blkaddr = blkaddr; - __entry->type = type; - ), - - TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "blkaddr = 0x%llx, bio_type = %s%s", - show_dev_ino(__entry), - (unsigned long)__entry->index, - (unsigned long long)__entry->blkaddr, - show_bio_type(__entry->type)) -); - -TRACE_EVENT(f2fs_get_data_block, - TP_PROTO(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int ret), - - TP_ARGS(inode, iblock, bh, ret), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(sector_t, iblock) - __field(sector_t, bh_start) - __field(size_t, bh_size) + __field(block_t, m_lblk) + __field(block_t, m_pblk) + __field(unsigned int, m_len) __field(int, ret) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->iblock = iblock; - __entry->bh_start = bh->b_blocknr; - __entry->bh_size = bh->b_size; + __entry->m_lblk = map->m_lblk; + __entry->m_pblk = map->m_pblk; + __entry->m_len = map->m_len; __entry->ret = ret; ), TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " - "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d", + "start blkaddr = 0x%llx, len = 0x%llx, err = %d", show_dev_ino(__entry), - (unsigned long long)__entry->iblock, - (unsigned long long)__entry->bh_start, - (unsigned long long)__entry->bh_size, + (unsigned long long)__entry->m_lblk, + (unsigned long long)__entry->m_pblk, + (unsigned long long)__entry->m_len, __entry->ret) ); @@ -680,11 +654,63 @@ TRACE_EVENT(f2fs_reserve_new_block, __entry->ofs_in_node) ); +DECLARE_EVENT_CLASS(f2fs__submit_page_bio, + + TP_PROTO(struct page *page, struct f2fs_io_info *fio), + + TP_ARGS(page, fio), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, index) + __field(block_t, blkaddr) + __field(int, rw) + __field(int, type) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->blkaddr = fio->blk_addr; + __entry->rw = fio->rw; + __entry->type = fio->type; + ), + + TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + "blkaddr = 0x%llx, rw = %s%s, type = %s", + show_dev_ino(__entry), + (unsigned long)__entry->index, + (unsigned long long)__entry->blkaddr, + show_bio_type(__entry->rw), + show_block_type(__entry->type)) +); + +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, + + TP_PROTO(struct page *page, struct f2fs_io_info *fio), + + TP_ARGS(page, fio), + + TP_CONDITION(page->mapping) +); + +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, + + TP_PROTO(struct page *page, struct f2fs_io_info *fio), + + TP_ARGS(page, fio), + + TP_CONDITION(page->mapping) +); + DECLARE_EVENT_CLASS(f2fs__submit_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_STRUCT__entry( __field(dev_t, dev) @@ -696,10 +722,10 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->rw = rw; - __entry->type = type; - __entry->sector = bio->bi_iter.bi_sector; - __entry->size = bio->bi_iter.bi_size; + __entry->rw = fio->rw; + __entry->type = fio->type; + __entry->sector = bio->bi_sector; + __entry->size = bio->bi_size; ), TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u", @@ -712,18 +738,20 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_CONDITION(bio) ); DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_CONDITION(bio) ); @@ -831,6 +859,13 @@ DEFINE_EVENT(f2fs__page, f2fs_writepage, TP_ARGS(page, type) ); +DEFINE_EVENT(f2fs__page, f2fs_do_write_data_page, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + DEFINE_EVENT(f2fs__page, f2fs_readpage, TP_PROTO(struct page *page, int type), @@ -852,6 +887,20 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite, TP_ARGS(page, type) ); +DEFINE_EVENT(f2fs__page, f2fs_register_inmem_page, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + +DEFINE_EVENT(f2fs__page, f2fs_commit_inmem_page, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + TRACE_EVENT(f2fs_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type), @@ -874,7 +923,6 @@ TRACE_EVENT(f2fs_writepages, __field(char, tagged_writepages) __field(char, for_reclaim) __field(char, range_cyclic) - __field(char, for_sync) ), TP_fast_assign( @@ -893,12 +941,11 @@ TRACE_EVENT(f2fs_writepages, __entry->tagged_writepages = wbc->tagged_writepages; __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; - __entry->for_sync = wbc->for_sync; ), TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, " "skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, " - "kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u", + "kupdate %u background %u tagged %u reclaim %u cyclic %u", show_dev_ino(__entry), show_block_type(__entry->type), show_file_type(__entry->dir), @@ -912,40 +959,7 @@ TRACE_EVENT(f2fs_writepages, __entry->for_background, __entry->tagged_writepages, __entry->for_reclaim, - __entry->range_cyclic, - __entry->for_sync) -); - -TRACE_EVENT(f2fs_submit_page_mbio, - - TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), - - TP_ARGS(page, rw, type, blk_addr), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(int, rw) - __field(int, type) - __field(pgoff_t, index) - __field(block_t, block) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->rw = rw; - __entry->type = type; - __entry->index = page->index; - __entry->block = blk_addr; - ), - - TP_printk("dev = (%d,%d), ino = %lu, %s%s, %s, index = %lu, blkaddr = 0x%llx", - show_dev_ino(__entry), - show_bio_type(__entry->rw), - show_block_type(__entry->type), - (unsigned long)__entry->index, - (unsigned long long)__entry->block) + __entry->range_cyclic) ); TRACE_EVENT(f2fs_write_checkpoint, @@ -998,14 +1012,15 @@ TRACE_EVENT(f2fs_issue_discard, TRACE_EVENT(f2fs_issue_flush, - TP_PROTO(struct super_block *sb, bool nobarrier, bool flush_merge), + TP_PROTO(struct super_block *sb, unsigned int nobarrier, + unsigned int flush_merge), TP_ARGS(sb, nobarrier, flush_merge), TP_STRUCT__entry( __field(dev_t, dev) - __field(bool, nobarrier) - __field(bool, flush_merge) + __field(unsigned int, nobarrier) + __field(unsigned int, flush_merge) ), TP_fast_assign( @@ -1019,6 +1034,140 @@ TRACE_EVENT(f2fs_issue_flush, __entry->nobarrier ? "skip (nobarrier)" : "issue", __entry->flush_merge ? " with flush_merge" : "") ); + +TRACE_EVENT(f2fs_lookup_extent_tree_start, + + TP_PROTO(struct inode *inode, unsigned int pgofs), + + TP_ARGS(inode, pgofs), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u", + show_dev_ino(__entry), + __entry->pgofs) +); + +TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, + + TP_PROTO(struct inode *inode, unsigned int pgofs, + struct extent_node *en), + + TP_ARGS(inode, pgofs, en), + + TP_CONDITION(en), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(unsigned int, fofs) + __field(u32, blk) + __field(unsigned int, len) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->fofs = en->ei.fofs; + __entry->blk = en->ei.blk; + __entry->len = en->ei.len; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " + "ext_info(fofs: %u, blk: %u, len: %u)", + show_dev_ino(__entry), + __entry->pgofs, + __entry->fofs, + __entry->blk, + __entry->len) +); + +TRACE_EVENT(f2fs_update_extent_tree, + + TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr), + + TP_ARGS(inode, pgofs, blkaddr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(u32, blk) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->blk = blkaddr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, blkaddr = %u", + show_dev_ino(__entry), + __entry->pgofs, + __entry->blk) +); + +TRACE_EVENT(f2fs_shrink_extent_tree, + + TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt, + unsigned int tree_cnt), + + TP_ARGS(sbi, node_cnt, tree_cnt), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, node_cnt) + __field(unsigned int, tree_cnt) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + __entry->node_cnt = node_cnt; + __entry->tree_cnt = tree_cnt; + ), + + TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u", + show_dev(__entry), + __entry->node_cnt, + __entry->tree_cnt) +); + +TRACE_EVENT(f2fs_destroy_extent_tree, + + TP_PROTO(struct inode *inode, unsigned int node_cnt), + + TP_ARGS(inode, node_cnt), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, node_cnt) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->node_cnt = node_cnt; + ), + + TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u", + show_dev_ino(__entry), + __entry->node_cnt) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */