1
1
#! /bin/bash
2
2
3
- [ -z " $1 " ] && echo " Usage: $0 <start|stop|status |env>" && exit 1
3
+ [ -z " $1 " ] && echo " Usage: $0 <start|stop|driver|exec |env>" && exit 1
4
4
5
5
# Version of the Spark image and tooling. Export it.
6
6
export VERSION=3.5.2-12
7
7
export IMAGE_NAME=registry.scality.com/spark/spark-container
8
8
9
9
# Please change the IPs according to your architecture
10
10
# without tuning the entrypoint.sh, a worker cannot have a master running on the same host
11
- master=" 10.160.172.70"
12
- workers=" 10.160.169.6 10.160.175.70 10.160.168.250 10.160.170.227 10.160.169.48"
11
+ master=" 10.160.169.162"
12
+ workers=" 10.160.174.4 10.160.168.21 10.160.171.166 10.160.172.100 10.160.169.238"
13
+
13
14
14
15
# Please change the directory where Spark workers will write temporary data
15
16
@@ -19,6 +20,14 @@ datadir="/scality/ssd01/spark"
19
20
appsdir=" /root/spark-apps"
20
21
logsdir=" /root/spark-logs"
21
22
23
+ # Extra hosts you need containers to be able to resolve.
24
+ # format: "ip1:host1 ip2:host2 ..."
25
+ # Example:
26
+ # extrahosts="10.2.4.1:s3.scality.com
27
+ # 19.10.3.1:microsoft.com"
28
+
29
+ extrahosts=" "
30
+
22
31
[ " ${1} " = " env" ] && return 0
23
32
24
33
# ########## END of tunable variables
@@ -46,8 +55,38 @@ test -z "$container_command" && echo "docker or CTR not found!" && exit 1
46
55
echo " Checking the Spark image with $container_command "
47
56
case $container_command
48
57
in
49
- docker) version_check_result=$( $container_command images --format ' {{.Repository}}:{{.Tag}}' | grep " ${spark_image_full} " ) ;;
50
- ctr) version_check_result=$( $container_command images list | grep " ${spark_image_full} " 2> /dev/null) ;;
58
+ docker) version_check_result=$( $container_command images --format ' {{.Repository}}:{{.Tag}}' | grep " ${spark_image_full} " )
59
+ # extrahosts update
60
+ add_hosts=" "
61
+ for i in $extrahosts ; do add_hosts+=" --add-host=$( echo $i ) " ; done
62
+ ;;
63
+ ctr) version_check_result=$( $container_command images list | grep " ${spark_image_full} " 2> /dev/null)
64
+ # hosts update
65
+ host_storage=" "
66
+ count=01
67
+ localname=
68
+ for i in $workers ; do
69
+ n=spark-worker-$( printf " %02d" ${count} )
70
+ (( count++ ))
71
+ if echo " $local_ips " | grep -Fqw " $i " ; then n=" ${n} ` hostname -s` " ; fi
72
+ test " $( grep -qw " $n " /etc/hosts ; echo $? ) " == 0 && continue
73
+ host_storage+=" $i $n \n"
74
+ done
75
+ for i in $extrahosts ; do
76
+ test " $( grep -qw " $i " /etc/hosts ; echo $? ) " == 0 && continue
77
+ host_storage+=" $( echo $i | sed ' s#:# #' ) # Part of Spark extrahosts $i \n"
78
+ done
79
+ test -n " $host_storage " && echo -e " # Host file updated for Spark\n$host_storage " >> /etc/hosts
80
+
81
+ # Manage when a role has changed in the same node
82
+ # spark-master should be known on all nodes
83
+ if ! grep -q " $master spark-master # set-by-spark_run.sh" /etc/hosts;
84
+ then
85
+ grep -q " spark-master # set-by-spark_run.sh" /etc/hosts && \
86
+ sed -i ' /spark-master # set-by-spark_run.sh/d' /etc/hosts
87
+ echo " $master spark-master # set-by-spark_run.sh" >> /etc/hosts
88
+ fi
89
+ ;;
51
90
esac
52
91
53
92
if [ -n " ${version_check_result} " ]; then
72
111
case $container_command
73
112
in
74
113
docker)
114
+
75
115
$container_command run --rm --net=host --name=EXEC \
76
116
-v " ${appsdir} :/opt/spark/apps:rw" \
77
117
-v " ${datadir} :/opt/spark/tmp:rw" \
78
118
-v " ${logsdir} :/opt/spark/spark-events:rw" \
119
+ ${add_hosts} \
79
120
" ${spark_image_full} " \
80
121
exec " $@ "
81
122
;;
96
137
-v " ${logsdir} :/opt/spark/spark-events:rw" \
97
138
--add-host=" spark-master:${master} " \
98
139
--add-host=" $( hostname -s) :$( echo ${workers} | awk ' {print $1}' ) " \
140
+ ${add_hosts} \
99
141
--workdir=/opt/spark/apps \
100
142
" ${spark_image_full} " \
101
143
driver
119
161
docker) echo " Running $container_command "
120
162
121
163
# hosts update
122
- host_storage=" "
164
+ host_storage=" ${add_hosts} "
123
165
count=01
124
166
for i in $workers ; do host_storage+=" --add-host=spark-worker-$( printf " %02d" ${count} ) :$i " ; (( count++ )) ; done
125
167
153
195
fi
154
196
;;
155
197
ctr) echo " Running $container_command "
156
- # hosts update
157
- host_storage=" "
158
- count=01
159
- localname=
160
- for i in $workers ; do
161
- n=spark-worker-$( printf " %02d" ${count} )
162
- (( count++ ))
163
- if echo " $local_ips " | grep -Fqw " $i " ; then n=" ${n} ` hostname -s` " ; fi
164
- test " $( grep -qw " $n " /etc/hosts ; echo $? ) " == 0 && continue
165
- host_storage+=" $i $n \n"
166
- done
167
- test -n " $host_storage " && echo -e " # Host file updated for Spark\n$host_storage " >> /etc/hosts
168
-
169
- # Manage when a role has changed in the same node
170
- # spark-master should be known on all nodes
171
- if ! grep -q " $master spark-master # set-by-spark_run.sh" /etc/hosts;
172
- then
173
- grep -q " spark-master # set-by-spark_run.sh" /etc/hosts && \
174
- sed -i ' /spark-master # set-by-spark_run.sh/d' /etc/hosts
175
- echo " $master spark-master # set-by-spark_run.sh" >> /etc/hosts
176
- fi
177
198
178
199
if [ -n " $local_master " ] ; then
179
200
echo " Running master here"
0 commit comments