@@ -13,16 +13,51 @@ sleep 20; while pgrep rclone > /dev/null; do sleep 1; done
1313source /opt/task/credentials
1414(systemctl is-system-running | grep stopping) || leo stop --cloud="$TPI_TASK_CLOUD_PROVIDER" --region="$TPI_TASK_CLOUD_REGION" "$TPI_TASK_IDENTIFIER";
1515END
16-
1716chmod u=rwx,g=rx,o=rx /usr/bin/tpi-task-shutdown
1817
18+ sudo tee /usr/bin/tpi-task-studio-log << 'END '
19+ #!/bin/bash
20+ URL="${STUDIO_URL:-https://studio.iterative.ai/api/live}"
21+ STEP="${STUDIO_STEP:-`echo $(date +%s)`}"
22+ STATUS=$1
23+ DATE_START="${TPI_TASK_DATE_START:-0}"
24+ DATE_END=0
25+
26+ if [ -n "$STUDIO_TOKEN" ]; then
27+ if [ -z "$STATUS" ]; then
28+ if systemctl is-system-running | grep stopping; then
29+ STATUS=queued;
30+ else
31+ if test $SERVICE_RESULT == timeout; then
32+ STATUS=timeout;
33+ else
34+ test $EXIT_STATUS == 0 && STATUS=succeeded || STATUS=failed;
35+ fi
36+ fi
37+ fi
38+
39+ if [[ "$STATUS" =~ ^(timeout|succeeded|failed)$ ]]; then
40+ DATE_END=$(date +%s)
41+ fi
42+
43+ STUDIO_PARAMS="{\"task\": {\"id\": \"${TPI_TASK_IDENTIFIER}\", \"status\": \"${STATUS}\", \"cloud\": \"${TPI_TASK_CLOUD_PROVIDER}\", \"machine\": \"${TPI_MACHINE}\", \"region\": \"${TPI_REGION}\", \"diskSize\": \"${TPI_DISK_SIZE}\", \"dateStart\": ${DATE_START}, \"dateEnd\": ${DATE_END}}}"
44+ STUDIO_PAYLOAD="{\"type\": \"data\", \"client\": \"dvclive\", \"repo_url\": \"${STUDIO_REPO_URL}\", \"baseline_sha\": \"${STUDIO_BASELINE_SHA}\", \"name\": \"TPI_TASK:${TPI_TASK_IDENTIFIER}\", \"step\":${STEP}, \"params\": ${STUDIO_PARAMS}}"
45+ curl -X POST $URL \
46+ -H "Content-Type: application/json" \
47+ -H "Authorization: token ${STUDIO_TOKEN}" \
48+ -d "${STUDIO_PAYLOAD}"
49+ fi
50+ END
51+ chmod u=rwx,g=rx,o=rx /usr/bin/tpi-task-studio-log
52+
1953base64 --decode << END | sudo tee /opt/task/variables > /dev/null
2054S0VZPSJWQUxVRSIK
2155END
56+ chmod u=rw,g=,o= /opt/task/variables
57+
2258base64 --decode << END | sudo tee /opt/task/credentials > /dev/null
2359ZXhwb3J0IFNFQ1JFVD1WQUxVRQo=
2460END
25- chmod u=rw,g=,o= /opt/task/variables
2661chmod u=rw,g=,o= /opt/task/credentials
2762
2863while IFS= read -rd $' \0' variable; do
@@ -48,7 +83,7 @@ sudo tee /etc/systemd/system/tpi-task.service > /dev/null <<END
4883[Service]
4984 Type=simple
5085 ExecStart=-$TPI_START_COMMAND
51- ExecStop=/bin/bash -c 'source /opt/task/credentials; systemctl is-system-running | grep stopping || echo "{\\\\ "result\\\\ ": \\\\ "\$ SERVICE_RESULT\\\\ ", \\\\ "code\\\\ ": \\\\ "\$ EXIT_STATUS\\\\ ", \\\\ "status\\\\ ": \\\\ "\$ EXIT_CODE\\\\ "}" > "$TPI_LOG_DIRECTORY /status-$TPI_MACHINE_IDENTITY " && RCLONE_CONFIG= rclone copy "$TPI_LOG_DIRECTORY " "\$ RCLONE_REMOTE/reports"'
86+ ExecStop=/bin/bash -c 'source /opt/task/credentials; /usr/bin/tpi-task-studio-log && systemctl is-system-running | grep stopping || echo "{\\\\ "result\\\\ ": \\\\ "\$ SERVICE_RESULT\\\\ ", \\\\ "code\\\\ ": \\\\ "\$ EXIT_STATUS\\\\ ", \\\\ "status\\\\ ": \\\\ "\$ EXIT_CODE\\\\ "}" > "$TPI_LOG_DIRECTORY /status-$TPI_MACHINE_IDENTITY " && RCLONE_CONFIG= rclone copy "$TPI_LOG_DIRECTORY " "\$ RCLONE_REMOTE/reports"'
5287 ExecStopPost=/usr/bin/tpi-task-shutdown
5388 Environment=HOME=/root
5489 EnvironmentFile=/opt/task/variables
@@ -101,6 +136,8 @@ if test -f /etc/apt/sources.list.d/cuda.list; then
101136 for list in cuda nvidia-ml; do mv /etc/apt/sources.list.d/$list .list{.backup,}; done
102137fi
103138
139+ /usr/bin/tpi-task-studio-log running
140+
104141sudo systemctl daemon-reload
105142sudo systemctl enable tpi-task.service --now
106143sudo systemctl disable --now apt-daily.timer
0 commit comments