Skip to content

Commit 8c28764

Browse files
committed
Update row-size-sampler.sh
include different cqlsh executables. Support paging. Include blob detection
1 parent c1e2c6a commit 8c28764

File tree

1 file changed

+22
-4
lines changed

1 file changed

+22
-4
lines changed

bin/row-size-sampler.sh

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/bin/bash
2+
shopt -s expand_aliases
23
# The following script will help gather row size estimates for each table in
34
# a cluster.
45
# It will query each table a fixed number of rows at a low query rate not to
@@ -13,15 +14,32 @@
1314
# The script takes the same parameters as cqlsh to connect to cassandra
1415
# example: ./row-size-sampler.sh cassandra.us-east-1.amazonaws.com 9142 -u "sampleuser" -p "samplepass" --ssl
1516

17+
# check if the cqlsh-expansion is installed, then if cqlsh installed, then check local file
18+
if [ -x "$(command -v cqlsh-expansion)" ]; then
19+
echo 'using installed cqlsh-expansion'
20+
alias kqlsh='cqlsh-expansion'
21+
elif [ -x "$(command -v cqlsh)" ]; then
22+
echo 'using installed cqlsh'
23+
alias kqlsh='cqlsh'
24+
elif [ -e cqlsh ]; then
25+
echo 'using local cqlsh'
26+
alias kqlsh='./cqlsh'
27+
else
28+
echo 'cqlsh not found'
29+
exit 1
30+
fi
31+
32+
echo 'starting...'
33+
1634
SYSTEMKEYSPACEFILTER='system\|system_schema\|system_traces\|system_auth\|dse_auth\|dse_security\|dse_leases\|system_distributed\|dse_perf\|dse_system\|OpsCenter\|cfs\|cfs_archive\|dse_leases\|dsefs\|HiveMetaStore\|spark_system'
1735

1836
TABLEFILTER='^-\|^table_name\|(\|)'
1937

20-
keyspaces=$(echo desc keyspaces | ./cqlsh $@ | xargs -n1 echo | grep -v $SYSTEMKEYSPACEFILTER)
38+
keyspaces=$(echo desc keyspaces | kqlsh $@ | xargs -n1 echo | grep -v $SYSTEMKEYSPACEFILTER)
2139
for ks in $keyspaces; do
22-
tables=$(echo "SELECT table_name FROM system_schema.tables WHERE keyspace_name='$ks';" | ./cqlsh $@ | xargs -n1 echo | grep -v $TABLEFILTER)
40+
tables=$(echo "SELECT table_name FROM system_schema.tables WHERE keyspace_name='$ks';" | kqlsh $@ | xargs -n1 echo | grep -v $TABLEFILTER)
2341
for tb in $tables; do
24-
./cqlsh $@ -e "CONSISTENCY LOCAL_ONE; PAGING 100; SELECT * FROM \"$ks\".\"$tb\" LIMIT 30000;" | grep -v '\[json\]\|rows)\|-----\|^$' | tr -d ' ' | awk -v keyspace=$ks -v table=$tb -F'|' 'BEGIN {columns=0; numSamples=30000; kilobyte=1024; min = "NaN"; max = -1; lines = 1; } { if(NR==2){columns=NF;} if(NR>2){thislen=length($0)+107; total+=thislen; squares+=thislen^2; lines+=1; avg=total/lines; min = (thislen<min ? thislen : min); max = (thislen>max ? thislen : max) }} NR==numSamples {exit} END { printf("%s.%s = { lines: %d, columns: %d, average: %d bytes, stdev: %d bytes, min: %d bytes, max: %d bytes}\n", keyspace, table, lines, columns, avg, sqrt(squares/lines - (avg^2)), min, max); }' >> row-size-estimates.txt 2>&1
25-
./cqlsh $@ -e "DESCRIBE \"$ks\".\"$tb\";" | grep -i blob | while read line; do printf "\t...this table contains a BLOB type, if the majority of row size is from the BLOB, divide the result of the row size in half" ; done
42+
kqlsh $@ -e "CONSISTENCY LOCAL_ONE; PAGING 100; SELECT * FROM \"$ks\".\"$tb\" LIMIT 30000;" | grep -v '\[json\]\|rows)\|-----\|^$' | tr -d ' ' | awk -v keyspace=$ks -v table=$tb -F'|' 'BEGIN {columns=0; numSamples=30000; kilobyte=1024; min = "NaN"; max = -1; lines = 1; } { if(NR==2){columns=NF;} if(NR>2){thislen=length($0)+107; total+=thislen; squares+=thislen^2; lines+=1; avg=total/lines; min = (thislen<min ? thislen : min); max = (thislen>max ? thislen : max) }} NR==numSamples {exit} END { printf("%s.%s = { lines: %d, columns: %d, average: %d bytes, stdev: %d bytes, min: %d bytes, max: %d bytes}\n", keyspace, table, lines, columns, avg, sqrt(squares/lines - (avg^2)), min, max); }'
43+
kqlsh $@ -e "DESCRIBE \"$ks\".\"$tb\";" | grep -i blob | while read line; do printf "\t...\"$ks\".\"$tb\" contains a BLOB type, if the majority of row size is from the BLOB, then divide the estimate in half" ; done
2644
done
2745
done

0 commit comments

Comments
 (0)