Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 220 additions & 3 deletions kmod/src/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -797,10 +797,12 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
struct scoutfs_lock *lock)
{
struct scoutfs_item_count cnt = SIC_TRUNC_EXTENT(inode);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
LIST_HEAD(ind_locks);
s64 ret = 0;

WARN_ON_ONCE(inode && !mutex_is_locked(&inode->i_mutex));
WARN_ON_ONCE(inode && !mutex_is_locked(&si->s_i_mutex));

/* clamp last to the last possible block? */
if (last > SCOUTFS_BLOCK_MAX)
Expand Down Expand Up @@ -1013,7 +1015,7 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock,
u64 offset;
int ret;

WARN_ON_ONCE(create && !mutex_is_locked(&inode->i_mutex));
WARN_ON_ONCE(create && !mutex_is_locked(&si->s_i_mutex));

/* make sure caller holds a cluster lock */
lock = scoutfs_per_task_get(&si->pt_data_lock);
Expand Down Expand Up @@ -1227,6 +1229,7 @@ static int scoutfs_write_begin(struct file *file,
if (!wbd)
return -ENOMEM;

mutex_lock(&si->s_i_mutex); /* released in scoutfs_write_end() */
INIT_LIST_HEAD(&wbd->ind_locks);
*fsdata = wbd;

Expand Down Expand Up @@ -1260,6 +1263,7 @@ static int scoutfs_write_begin(struct file *file,
out:
if (ret) {
scoutfs_inode_index_unlock(sb, &wbd->ind_locks);
mutex_unlock(&si->s_i_mutex);
kfree(wbd);
}
return ret;
Expand Down Expand Up @@ -1326,6 +1330,7 @@ static int scoutfs_write_end(struct file *file, struct address_space *mapping,
pos + ret - BACKGROUND_WRITEBACK_BYTES,
pos + ret - 1);

mutex_unlock(&si->s_i_mutex); /* locked in scoutfs_write_begin() */
return ret;
}

Expand Down Expand Up @@ -1441,6 +1446,7 @@ static int fallocate_extents(struct super_block *sb, struct inode *inode,
long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
const u64 ino = scoutfs_ino(inode);
struct scoutfs_lock *lock = NULL;
Expand All @@ -1451,6 +1457,7 @@ long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
int ret;

mutex_lock(&inode->i_mutex);
mutex_lock(&si->s_i_mutex);

/* XXX support more flags */
if (mode & ~(FALLOC_FL_KEEP_SIZE)) {
Expand Down Expand Up @@ -1516,6 +1523,7 @@ long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)

out:
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
mutex_unlock(&si->s_i_mutex);
mutex_unlock(&inode->i_mutex);

trace_scoutfs_data_fallocate(sb, ino, mode, offset, len, ret);
Expand Down Expand Up @@ -1638,6 +1646,7 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
{
struct super_block *sb = inode->i_sb;
const u64 ino = scoutfs_ino(inode);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct scoutfs_lock *lock = NULL;
struct unpacked_extents *unpe = NULL;
struct unpacked_extent *ext;
Expand All @@ -1657,6 +1666,7 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,

/* XXX overkill? */
mutex_lock(&inode->i_mutex);
mutex_lock(&si->s_i_mutex);

ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
if (ret)
Expand All @@ -1679,6 +1689,9 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
break;
}

/* we can't hold s_i_mutex during copy_to_user(). */
mutex_unlock(&si->s_i_mutex);

for (ext = find_extent(unpe, iblock, last); ext;
ext = next_extent(ext)) {

Expand All @@ -1705,14 +1718,19 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
iblock = unpe->iblock + SCOUTFS_PACKEXT_BLOCKS;
free_unpacked_extents(unpe);
unpe = NULL;

/* we can't hold s_i_mutex during copy_to_user(). */
mutex_lock(&si->s_i_mutex);
}

if (cur.count)
ret = fill_extent(fieinfo, &cur, last_flags);
out:
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
mutex_unlock(&si->s_i_mutex);
mutex_unlock(&inode->i_mutex);

if (!ret && cur.count)
ret = fill_extent(fieinfo, &cur, last_flags);

free_unpacked_extents(unpe);

if (ret == 1)
Expand Down Expand Up @@ -1999,6 +2017,204 @@ int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,
return ret;
}

/* Mostly cribbed from mm/filemap.c:filemap_page_mkwrite() */
static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct file *file = vma->vm_file;
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *inode_lock = NULL;
loff_t old_size = inode->i_size;
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
DECLARE_DATA_WAIT(dw);
struct write_begin_data wbd;
bool i_size_changed = false;
u64 ind_seq;
loff_t pos;
int ret;
int err;

sb_start_pagefault(sb);
mutex_lock(&si->s_i_mutex);

retry:
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
if (ret) {
ret = VM_FAULT_ERROR;
goto out;
}

if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
/* data_version is per inode, whole file must be online */
ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode),
SEF_OFFLINE,
SCOUTFS_IOC_DWO_WRITE,
&dw, inode_lock);
if (ret != 0)
goto out;
}

file_update_time(vma->vm_file);
lock_page(page);
ret = VM_FAULT_LOCKED;
if (page->mapping != inode->i_mapping) {
unlock_page(page);
ret = VM_FAULT_NOPAGE;
goto out;
}

pos = vmf->pgoff;
pos <<= PAGE_CACHE_SHIFT;

/* scoutfs_write_begin */
memset(&wbd, 0, sizeof(wbd));
INIT_LIST_HEAD(&wbd.ind_locks);
wbd.lock = inode_lock;

do {
err = scoutfs_inode_index_start(sb, &ind_seq) ?:
scoutfs_inode_index_prepare(sb, &wbd.ind_locks, inode,
true) ?:
scoutfs_inode_index_try_lock_hold(sb, &wbd.ind_locks,
ind_seq,
SIC_WRITE_BEGIN());
} while (err > 0);
if (err < 0) {
ret = VM_FAULT_ERROR;
goto out_release_trans;
}

err = __block_write_begin(page, pos, PAGE_SIZE, scoutfs_get_block);
if (err) {
ret = VM_FAULT_ERROR;
goto out_release_trans;
}
/* end scoutfs_write_begin */

/*
* We mark the page dirty already here so that when freeze is in
* progress, we are guaranteed that writeback during freezing will
* see the dirty page and writeprotect it again.
*/
set_page_dirty(page);
wait_for_stable_page(page);

/* start generic_write_end */
if (pos + PAGE_SIZE > inode->i_size) {
i_size_write(inode, pos + PAGE_SIZE);
i_size_changed = true;
}
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
if (i_size_changed)
mark_inode_dirty(inode);
/* end generic_write_end */

/* scoutfs_write_end */
if (!si->staging) {
scoutfs_inode_set_data_seq(inode);
scoutfs_inode_inc_data_version(inode);
}

scoutfs_update_inode_item(inode, wbd.lock, &wbd.ind_locks);
scoutfs_inode_queue_writeback(inode);
out_release_trans:
scoutfs_release_trans(sb);
scoutfs_inode_index_unlock(sb, &wbd.ind_locks);
/* end scoutfs_write_end */

out:
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_WRITE);
mutex_unlock(&si->s_i_mutex);
if (scoutfs_data_wait_found(&dw)) {
ret = scoutfs_data_wait(inode, &dw);
if (ret == 0)
goto retry;
}

sb_end_pagefault(sb);
return ret;
}

int scoutfs_data_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct file *file = vma->vm_file;
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *inode_lock = NULL;
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
DECLARE_DATA_WAIT(dw);
bool have_ret = false;
loff_t pos;
int ret;

pos = vmf->pgoff;
pos <<= PAGE_CACHE_SHIFT;

retry:
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
if (ret < 0)
return VM_FAULT_SIGBUS;

if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
/* protect checked extents from stage/release */
mutex_lock(&si->s_i_mutex);
atomic_inc(&inode->i_dio_count);
mutex_unlock(&si->s_i_mutex);

ret = scoutfs_data_wait_check(inode, pos, PAGE_SIZE,
SEF_OFFLINE, SCOUTFS_IOC_DWO_READ,
&dw, inode_lock);
if (ret != 0) {
goto out;
}
}

ret = filemap_fault(vma, vmf);
have_ret = true;

out:
if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent))
inode_dio_done(inode);
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
if (scoutfs_data_wait_found(&dw)) {
int err = scoutfs_data_wait(inode, &dw);
if (!have_ret) {
if (err == 0)
goto retry;
up_read(&vma->vm_mm->mmap_sem);
ret = VM_FAULT_RETRY;
have_ret = true;
}
}

if (!have_ret)
ret = VM_FAULT_SIGBUS;

return ret;
}


static const struct vm_operations_struct scoutfs_data_file_vm_ops = {
.fault = scoutfs_data_filemap_fault,
.page_mkwrite = scoutfs_data_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};

static int scoutfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
vma->vm_ops = &scoutfs_data_file_vm_ops;
return 0;
}

const struct address_space_operations scoutfs_file_aops = {
.readpage = scoutfs_readpage,
.readpages = scoutfs_readpages,
Expand All @@ -2013,6 +2229,7 @@ const struct file_operations scoutfs_file_fops = {
.write = do_sync_write,
.aio_read = scoutfs_file_aio_read,
.aio_write = scoutfs_file_aio_write,
.mmap = scoutfs_file_mmap,
.unlocked_ioctl = scoutfs_ioctl,
.fsync = scoutfs_file_fsync,
.llseek = scoutfs_file_llseek,
Expand Down
2 changes: 2 additions & 0 deletions kmod/src/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
/* protect checked extents from stage/release */
mutex_lock(&inode->i_mutex);
mutex_lock(&si->s_i_mutex);
atomic_inc(&inode->i_dio_count);
mutex_unlock(&si->s_i_mutex);
mutex_unlock(&inode->i_mutex);

ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
Expand Down
6 changes: 6 additions & 0 deletions kmod/src/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ static void scoutfs_inode_ctor(void *obj)
{
struct scoutfs_inode_info *ci = obj;

mutex_init(&ci->s_i_mutex);
mutex_init(&ci->item_mutex);
seqcount_init(&ci->seqcount);
ci->staging = false;
Expand Down Expand Up @@ -412,6 +413,7 @@ int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock)
int scoutfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *lock = NULL;
DECLARE_DATA_WAIT(dw);
Expand All @@ -422,6 +424,7 @@ int scoutfs_setattr(struct dentry *dentry, struct iattr *attr)

trace_scoutfs_setattr(dentry, attr);

mutex_lock(&si->s_i_mutex);
retry:
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
Expand Down Expand Up @@ -456,9 +459,11 @@ int scoutfs_setattr(struct dentry *dentry, struct iattr *attr)
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);

/* XXX callee locks instead? */
mutex_unlock(&si->s_i_mutex);
mutex_unlock(&inode->i_mutex);
ret = scoutfs_data_wait(inode, &dw);
mutex_lock(&inode->i_mutex);
mutex_lock(&si->s_i_mutex);

if (ret == 0)
goto retry;
Expand Down Expand Up @@ -492,6 +497,7 @@ int scoutfs_setattr(struct dentry *dentry, struct iattr *attr)
scoutfs_inode_index_unlock(sb, &ind_locks);
out:
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
mutex_unlock(&si->s_i_mutex);
return ret;
}

Expand Down
6 changes: 6 additions & 0 deletions kmod/src/inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ struct scoutfs_inode_info {
u64 offline_blocks;
u32 flags;

/* We can't use inode->i_mutex to protect i_dio_count due to lock
* ordering in the kernel between i_mutex and mmap_sem. Use this
* as an inner lock.
*/
struct mutex s_i_mutex;

/*
* The in-memory item info caches the current index item values
* so that we can decide to update them with comparisons instead
Expand Down
Loading