Skip to content

Commit 1b9f98a

Browse files
committed
Add required scripts for LifecycleLoggingTest
1 parent c429acb commit 1b9f98a

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#!/bin/bash
2+
# check-service-monitor: Search the SN logs for service monitor events
3+
# and check for unexpected service failures. Detects failures that were
4+
# not explicitly injected by the test (typically via node killer) as
5+
# well as cases where services are not restarted because they
6+
# experienced too many restarts in a short period of time.
7+
#
8+
# If the verbose flag is not specified and unexpected failures are
9+
# found, the events are ordered by time and printed to standard error.
10+
#
11+
# If the verbose flag is specified, all service monitor events are
12+
# echoed to standard output, in addition to a final summary line.
13+
#
14+
# Exits with status 0 if SN log files with service lifecycle events are
15+
# found successfully in the specified directory and no unexpected
16+
# service failures are found. Otherwise exits with a non-zero status.
17+
# (The check for service lifecycle events is to make sure that the
18+
# directory actually contains SN logs.)
19+
20+
PROG=$(basename $0)
21+
USAGE="Usage: $PROG [-vh] [dir]\n
22+
-v\t Print all service monitor events\n
23+
-h\t Print help message\n
24+
dir\t The directory to search for logs, defaults to current directory"
25+
26+
LIFECYCLE_PAT="LIFECYCLE Service status change:"
27+
MONITOR_PAT="LIFECYCLE Service monitor event:"
28+
FAULT_PAT="event=EXIT \(cause=FAULT\|.* restart=DENIED\)"
29+
30+
while [ $# != 0 ]; do
31+
if [ "-v" == "$1" ]; then
32+
verbose=true
33+
shift
34+
elif [ "-h" == "$1" ]; then
35+
echo -e $USAGE >&2
36+
exit 1
37+
elif [ "$1" = -* ]; then
38+
echo "Error: Unknown option: $1" >&2
39+
echo -e $USAGE >&2
40+
exit 1
41+
else
42+
break
43+
fi
44+
done
45+
46+
dir=$(pwd)
47+
if [ $# != 0 ]; then
48+
dir="$1"
49+
shift
50+
fi
51+
52+
if [ $# != 0 ]; then
53+
echo "Error: Unexpected extra arguments: $*" >&2
54+
echo -e $USAGE >&2
55+
exit 1
56+
fi
57+
58+
tmp=$(mktemp /tmp/$PROG.XXXXXX) || exit 1
59+
files=$(find "$dir" -type f \( -name 'sn*log*' -or -name 'snaboot*log*' \)) \
60+
|| exit 1
61+
62+
for f in $files; do
63+
grep -h -e "$MONITOR_PAT\|$LIFECYCLE_PAT" $f >> $tmp
64+
# Exit status 2 means the grep failed independently of whether the
65+
# pattern was or was not found
66+
if [ $? == 2 ]; then
67+
exit 1;
68+
fi
69+
done
70+
71+
if [ ! -s $tmp ]; then
72+
echo "Error: No service events found" >&2
73+
rm -f $tmp
74+
exit 1
75+
fi
76+
77+
tmp2=$(mktemp /tmp/$PROG.XXXXXX) || exit 1
78+
grep "$MONITOR_PAT" $tmp | sort -u > $tmp2
79+
80+
if [ "$verbose" == "true" ]; then
81+
cat $tmp2
82+
grep "$FAULT_PAT" $tmp2 > /dev/null
83+
else
84+
grep "$FAULT_PAT" $tmp2 >&2
85+
fi
86+
grep_status=$?
87+
88+
rm -f $tmp $tmp2
89+
90+
if [ $grep_status == 0 ]; then
91+
echo "Found faults" >&2
92+
exit 1
93+
fi
94+
95+
if [ "$verbose" == "true" ]; then
96+
echo "No faults found"
97+
fi
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/bin/bash
2+
# check-node-killer-logs: Search the RN, admin, and node killer logs to
3+
# produce a list of lifecycle, fault injection, shutdown request, and
4+
# node killer unexpected events sorted by time, to help debug node
5+
# killer failures.
6+
7+
PROG=$(basename $0)
8+
USAGE="Usage: $PROG [-h] [dir]\n
9+
-h\t Print help message\n
10+
dir\t The directory to search for logs, defaults to current directory"
11+
12+
while [ $# != 0 ]; do
13+
if [ "-h" == $1 ]; then
14+
echo -e $USAGE >&2
15+
exit 1
16+
elif [ "$1" = -* ]; then
17+
echo "Error: Unknown option: $1" >&2
18+
echo -e $USAGE >&2
19+
exit 1
20+
else
21+
break
22+
fi
23+
done
24+
25+
dir=$(pwd)
26+
if [ $# != 0 ]; then
27+
dir="$1"
28+
shift
29+
fi
30+
31+
if [ $# != 0 ]; then
32+
echo "Error: Unexpected extra arguments: $*" >&2
33+
echo -e $USAGE >&2
34+
exit 1
35+
fi
36+
37+
# Use find to get all RN, admin, and node killer logs. Use grep to
38+
# collect all service lifecycle, fault injection, and shutdown requests
39+
# in the RN and admin logs, and unexpected event entries in the node
40+
# killer logs. Since each entry starts with a timestamp, sorting them
41+
# will sort them by time, using the -u option to remove duplicates.
42+
find $dir \
43+
-type f \
44+
\( -name 'rg*log*' -or -name 'admin*log*' -or -name 'sn*log*' \
45+
-or -name nodekill.out \) \
46+
-exec grep -h -e \
47+
'LIFECYCLE Service status change\|\[NodeKiller\] Unexpected event' \{\} + \
48+
| sort -u

0 commit comments

Comments
 (0)