Skip to content

Commit 41c5822

Browse files
Permit the addition of extra ip:hostname pairs within containers
1 parent fbcacbc commit 41c5822

File tree

1 file changed

+48
-27
lines changed

1 file changed

+48
-27
lines changed

spark_run.sh

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
#!/bin/bash
22

3-
[ -z "$1" ] && echo "Usage: $0 <start|stop|status|env>" && exit 1
3+
[ -z "$1" ] && echo "Usage: $0 <start|stop|driver|exec|env>" && exit 1
44

55
# Version of the Spark image and tooling. Export it.
66
export VERSION=3.5.2-12
77
export IMAGE_NAME=registry.scality.com/spark/spark-container
88

99
# Please change the IPs according to your architecture
1010
# without tuning the entrypoint.sh, a worker cannot have a master running on the same host
11-
master="10.160.172.70"
12-
workers="10.160.169.6 10.160.175.70 10.160.168.250 10.160.170.227 10.160.169.48"
11+
master="10.160.169.162"
12+
workers="10.160.174.4 10.160.168.21 10.160.171.166 10.160.172.100 10.160.169.238"
13+
1314

1415
# Please change the directory where Spark workers will write temporary data
1516

@@ -19,6 +20,14 @@ datadir="/scality/ssd01/spark"
1920
appsdir="/root/spark-apps"
2021
logsdir="/root/spark-logs"
2122

23+
# Extra hosts you need containers to be able to resolve.
24+
# format: "ip1:host1 ip2:host2 ..."
25+
# Example:
26+
# extrahosts="10.2.4.1:s3.scality.com
27+
# 19.10.3.1:microsoft.com"
28+
29+
extrahosts=""
30+
2231
[ "${1}" = "env" ] && return 0
2332

2433
########### END of tunable variables
@@ -46,8 +55,38 @@ test -z "$container_command" && echo "docker or CTR not found!" && exit 1
4655
echo "Checking the Spark image with $container_command"
4756
case $container_command
4857
in
49-
docker) version_check_result=$($container_command images --format '{{.Repository}}:{{.Tag}}' | grep "${spark_image_full}") ;;
50-
ctr) version_check_result=$($container_command images list | grep "${spark_image_full}" 2> /dev/null);;
58+
docker) version_check_result=$($container_command images --format '{{.Repository}}:{{.Tag}}' | grep "${spark_image_full}")
59+
# extrahosts update
60+
add_hosts=""
61+
for i in $extrahosts ; do add_hosts+=" --add-host=$(echo $i)"; done
62+
;;
63+
ctr) version_check_result=$($container_command images list | grep "${spark_image_full}" 2> /dev/null)
64+
# hosts update
65+
host_storage=""
66+
count=01
67+
localname=
68+
for i in $workers ; do
69+
n=spark-worker-$(printf "%02d" ${count})
70+
((count++))
71+
if echo "$local_ips" |grep -Fqw "$i" ; then n="${n} `hostname -s`" ; fi
72+
test "$(grep -qw "$n" /etc/hosts ; echo $?)" == 0 && continue
73+
host_storage+="$i $n\n"
74+
done
75+
for i in $extrahosts ; do
76+
test "$(grep -qw "$i" /etc/hosts ; echo $?)" == 0 && continue
77+
host_storage+="$(echo $i | sed 's#:# #') # Part of Spark extrahosts $i\n"
78+
done
79+
test -n "$host_storage" && echo -e "# Host file updated for Spark\n$host_storage" >> /etc/hosts
80+
81+
# Manage when a role has changed in the same node
82+
# spark-master should be known on all nodes
83+
if ! grep -q "$master spark-master # set-by-spark_run.sh" /etc/hosts;
84+
then
85+
grep -q "spark-master # set-by-spark_run.sh" /etc/hosts && \
86+
sed -i '/spark-master # set-by-spark_run.sh/d' /etc/hosts
87+
echo "$master spark-master # set-by-spark_run.sh" >> /etc/hosts
88+
fi
89+
;;
5190
esac
5291

5392
if [ -n "${version_check_result}" ];then
@@ -72,10 +111,12 @@ in
72111
case $container_command
73112
in
74113
docker)
114+
75115
$container_command run --rm --net=host --name=EXEC \
76116
-v "${appsdir}:/opt/spark/apps:rw" \
77117
-v "${datadir}:/opt/spark/tmp:rw" \
78118
-v "${logsdir}:/opt/spark/spark-events:rw" \
119+
${add_hosts} \
79120
"${spark_image_full}" \
80121
exec "$@"
81122
;;
@@ -96,6 +137,7 @@ in
96137
-v "${logsdir}:/opt/spark/spark-events:rw" \
97138
--add-host="spark-master:${master}" \
98139
--add-host="$(hostname -s):$(echo ${workers} | awk '{print $1}')" \
140+
${add_hosts} \
99141
--workdir=/opt/spark/apps \
100142
"${spark_image_full}" \
101143
driver
@@ -119,7 +161,7 @@ in
119161
docker) echo "Running $container_command"
120162

121163
# hosts update
122-
host_storage=""
164+
host_storage="${add_hosts}"
123165
count=01
124166
for i in $workers ; do host_storage+=" --add-host=spark-worker-$(printf "%02d" ${count}):$i"; ((count++)) ; done
125167

@@ -153,27 +195,6 @@ in
153195
fi
154196
;;
155197
ctr) echo "Running $container_command"
156-
# hosts update
157-
host_storage=""
158-
count=01
159-
localname=
160-
for i in $workers ; do
161-
n=spark-worker-$(printf "%02d" ${count})
162-
((count++))
163-
if echo "$local_ips" |grep -Fqw "$i" ; then n="${n} `hostname -s`" ; fi
164-
test "$(grep -qw "$n" /etc/hosts ; echo $?)" == 0 && continue
165-
host_storage+="$i $n\n"
166-
done
167-
test -n "$host_storage" && echo -e "# Host file updated for Spark\n$host_storage" >> /etc/hosts
168-
169-
# Manage when a role has changed in the same node
170-
# spark-master should be known on all nodes
171-
if ! grep -q "$master spark-master # set-by-spark_run.sh" /etc/hosts;
172-
then
173-
grep -q "spark-master # set-by-spark_run.sh" /etc/hosts && \
174-
sed -i '/spark-master # set-by-spark_run.sh/d' /etc/hosts
175-
echo "$master spark-master # set-by-spark_run.sh" >> /etc/hosts
176-
fi
177198

178199
if [ -n "$local_master" ] ; then
179200
echo "Running master here"

0 commit comments

Comments
 (0)