Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion kmod/src/forest.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ struct forest_info {
struct delayed_work log_merge_dwork;

atomic64_t inode_count_delta;

u64 merge_dirty_limit_override;
};

#define DECLARE_FOREST_INFO(sb, name) \
Expand Down Expand Up @@ -668,6 +670,7 @@ static void scoutfs_forest_log_merge_worker(struct work_struct *work)
struct scoutfs_key key;
unsigned long delay;
LIST_HEAD(inputs);
u64 dirty_limit;
int ret;

ret = scoutfs_client_get_log_merge(sb, &req);
Expand Down Expand Up @@ -728,10 +731,14 @@ static void scoutfs_forest_log_merge_worker(struct work_struct *work)
goto out;
}

dirty_limit = READ_ONCE(finf->merge_dirty_limit_override);
if (!dirty_limit)
dirty_limit = SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT;

ret = scoutfs_btree_merge(sb, &alloc, &wri, &req.start, &req.end,
&next, &comp.root, &inputs,
!!(req.flags & cpu_to_le64(SCOUTFS_LOG_MERGE_REQUEST_SUBTREE)),
SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT, 10,
dirty_limit, 10,
(2 * 1024 * 1024));
if (ret == -ERANGE) {
comp.remain = next;
Expand Down Expand Up @@ -781,6 +788,9 @@ int scoutfs_forest_setup(struct super_block *sb)
scoutfs_forest_log_merge_worker);
sbi->forest_info = finf;

debugfs_create_u64("log_merge_dirty_limit", 0644, sbi->debug_root,
&finf->merge_dirty_limit_override);

finf->workq = alloc_workqueue("scoutfs_log_merge", WQ_NON_REENTRANT |
WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!finf->workq) {
Expand Down
3 changes: 2 additions & 1 deletion tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ BIN := src/createmany \
src/o_tmpfile_umask \
src/o_tmpfile_linkat \
src/mmap_stress \
src/mmap_validate
src/mmap_validate \
src/totl_xattr_fill

DEPS := $(wildcard src/*.d)

Expand Down
4 changes: 4 additions & 0 deletions tests/golden/totl-merge-negative
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
== setup
== update and merge
== verify totals
== cleanup
1 change: 1 addition & 0 deletions tests/sequence
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ simple-xattr-unit.sh
retention-basic.sh
totl-xattr-tag.sh
quota.sh
totl-merge-negative.sh
lock-refleak.sh
lock-shrink-consistency.sh
lock-shrink-read-race.sh
Expand Down
98 changes: 98 additions & 0 deletions tests/src/totl_xattr_fill.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/xattr.h>

#define error_exit(cond, fmt, args...) \
do { \
if (cond) { \
printf("error: "fmt"\n", ##args); \
exit(1); \
} \
} while (0)

#define ERRF " errno %d (%s)"
#define ERRA errno, strerror(errno)

/*
* Doing setxattr() from a shell a few thousand times takes way too
* long, hence, this program
*/

static void usage(void)
{
printf("usage: totl_xattr_fill -d DIR -p PREFIX -n COUNT -v VALUE [-c]\n"
" -d DIR directory containing files\n"
" -p PREFIX file name prefix\n"
" -n COUNT number of files (numbered 1..COUNT)\n"
" -v VALUE xattr value string\n"
" -c create files before setting xattr\n");
exit(1);
}

int main(int argc, char **argv)
{
char *dir = NULL;
char *prefix = NULL;
char *value = NULL;
unsigned long count = 0;
int create = 0;
char path[4096];
char name[256];
unsigned long i;
int fd;
int ret;
int c;

while ((c = getopt(argc, argv, "d:p:n:v:c")) != -1) {
switch (c) {
case 'd':
dir = optarg;
break;
case 'p':
prefix = optarg;
break;
case 'n':
count = strtoul(optarg, NULL, 0);
break;
case 'v':
value = optarg;
break;
case 'c':
create = 1;
break;
case '?':
usage();
}
}

error_exit(!dir, "must specify directory with -d");
error_exit(!prefix, "must specify prefix with -p");
error_exit(!count, "must specify count with -n");
error_exit(!value, "must specify value with -v");

for (i = 1; i <= count; i++) {
ret = snprintf(path, sizeof(path), "%s/%s%lu", dir, prefix, i);
error_exit(ret >= (int)sizeof(path), "path too long");

if (create) {
fd = open(path, O_CREAT | O_WRONLY, 0644);
error_exit(fd < 0, "open %s failed"ERRF, path, ERRA);
close(fd);
}

ret = snprintf(name, sizeof(name),
"scoutfs.totl.test.%lu.0.0", i);
error_exit(ret >= (int)sizeof(name), "xattr name too long");

ret = setxattr(path, name, value, strlen(value), 0);
error_exit(ret, "setxattr %s %s failed"ERRF,
path, name, ERRA);
}

return 0;
}
118 changes: 118 additions & 0 deletions tests/tests/totl-merge-negative.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#
# Test that merge_read_item() correctly updates the sequence number when
# combining delta items from multiple finalized log trees.
#
# A bug in merge_read_item() fails to update found->seq to the max of
# both items' seqs when combining deltas. The combined item retains
# a stale seq from the lower-RID tree processed first.
#
# Two mounts create totl delta items for the same keys. The lower-RID
# mount writes first so it acquires locks first and gets lower seqs.
# Finalized trees are iterated in (rid, nr) key order during merge, so
# the lower-RID tree (with lower seqs) is processed first -- the
# combined item keeps the stale seq from that tree.
#
# An initial round of writes + merge establishes baseline totl values
# and commits all transactions. The second round writes FIRST then
# SECOND in sequence so FIRST opens a fresh transaction first (lower
# trans_seq) and acquires locks first (lower write_seq).
#
# The dirty limit override forces partial merges so that stale-seq
# items are written to fs_root while finalized logs still exist,
# causing readers to double-count finalized deltas.
#
# The read-xattr-totals ioctl uses a sliding cursor that can return
# pre-merge values (< expected) or duplicate entries when btree data
# changes between batches. Only totals ABOVE the expected value
# indicate the double-counting bug.
#

t_require_commands totl_xattr_fill scoutfs
t_require_mounts 2

NR_KEYS=50000
EXPECTED_TOTAL=250

rid_0=$(t_mount_rid 0)
rid_1=$(t_mount_rid 1)

if [[ "$rid_0" < "$rid_1" ]]; then
FIRST=0; SECOND=1
else
FIRST=1; SECOND=0
fi

FIRST_D="$(eval echo \$T_D$FIRST)"
SECOND_D="$(eval echo \$T_D$SECOND)"

echo "== setup"
totl_xattr_fill -d "$FIRST_D" -p file-first- -n $NR_KEYS -v 100 -c
totl_xattr_fill -d "$SECOND_D" -p file-second- -n $NR_KEYS -v 100 -c
sync
t_force_log_merge

echo "== update and merge"

# FIRST writes first: opens new transaction first (lower trans_seq),
# acquires locks first (lower write_seq).
totl_xattr_fill -d "$FIRST_D" -p file-first- -n $NR_KEYS -v 200
totl_xattr_fill -d "$SECOND_D" -p file-second- -n $NR_KEYS -v 50

# Force partial merges by limiting dirty bytes to one block so that
# stale-seq combined items get written to fs_root while finalized
# logs still exist.
for i in $(t_fs_nrs); do
echo 4096 > $(t_debugfs_path $i)/log_merge_dirty_limit
done

sync

# Start the merge but don't wait for completion -- read totals while
# the merge is running so we can catch the double-counting window.
sv=$(t_server_nr)
last_complete=$(t_counter log_merge_complete $sv)

t_trigger_arm_silent log_merge_force_finalize_ours $sv
t_sync_seq_index
while test "$(t_trigger_get log_merge_force_finalize_ours $sv)" == "1"; do
sleep .1
done

echo "== verify totals"

# Read totals repeatedly while the merge runs. The stale-seq bug
# causes totl_merge_resolve() to double-count finalized deltas for
# keys spliced into fs_root with a stale sequence number, producing
# totals above the expected value.
#
# The sliding cursor may return pre-merge values (below expected) or
# duplicate entries -- these are not the bug. Only totals ABOVE the
# expected value indicate double-counting.
nr_reads=0
while (( $(t_counter log_merge_complete $sv) == last_complete )); do
for i in $(t_fs_nrs); do
echo 1 > $(t_debugfs_path $i)/drop_weak_item_cache
done
bad=$(scoutfs read-xattr-totals -p "$T_M0" | \
awk -F'[ =,]+' -v limit=$EXPECTED_TOTAL '$2+0 > limit+0')
nr_reads=$((nr_reads + 1))
if [ -n "$bad" ]; then
echo "double-counted totals during merge (read $nr_reads):"
echo "$bad"
for i in $(t_fs_nrs); do
echo 0 > $(t_debugfs_path $i)/log_merge_dirty_limit
done
t_fail "stale seq caused double-counted totals"
fi
done

# Restore normal dirty limit.
for i in $(t_fs_nrs); do
echo 0 > $(t_debugfs_path $i)/log_merge_dirty_limit
done

echo "== cleanup"
find "$FIRST_D" -maxdepth 1 -name 'file-first-*' -delete
find "$SECOND_D" -maxdepth 1 -name 'file-second-*' -delete

t_pass