From 19c93259812c6397d899d757f4e3cd39ffefbd1a Mon Sep 17 00:00:00 2001 From: Alejandro Gil Date: Wed, 29 Apr 2026 14:45:33 -0700 Subject: [PATCH 1/2] fix(docker): serialize build stages and bump SSH timeout for low-mem EC2 The cold build on a t3.medium production host locked the box hard enough that SSH and SSM both became unresponsive. Root cause: BuildKit runs independent stages in parallel by default, so cargo release build (-j 2, ~3 GB RSS) and bun/vite build (NODE_OPTIONS 2 GB) ran simultaneously on a 4 GB instance. Even with 4 GB swap the system thrashed into a kernel lockup. Two changes: 1. Add a no-op `COPY --from=rust-builder /artifacts/keycast /tmp/.rust-builder-done` as the first instruction of web-builder. BuildKit sees the cross-stage dependency and only starts web-builder once rust-builder finishes, so cargo and bun never run concurrently. 2. Bump appleboy/ssh-action `command_timeout` from 30m to 60m across all four deploy/QA steps. A cold cargo + bun build on t3.medium with -j 2 takes ~45-55 min; the previous 30m killed the SSH session mid-build. Once cache mounts are populated by a successful cold build, warm builds return to a few minutes and stay well under the new timeout. Co-Authored-By: Claude Opus 4.7 --- .github/workflows/build-test-push-synvya.yaml | 8 ++++---- Dockerfile | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-test-push-synvya.yaml b/.github/workflows/build-test-push-synvya.yaml index cdd0721f..5cc82c0d 100644 --- a/.github/workflows/build-test-push-synvya.yaml +++ b/.github/workflows/build-test-push-synvya.yaml @@ -134,7 +134,7 @@ jobs: host: ${{ secrets.EC2_STAGING_HOST }} username: ec2-user key: ${{ secrets.EC2_STAGING_SSH_KEY }} - command_timeout: 30m + command_timeout: 60m script: | set -euo pipefail cd /opt/synvya/keycast @@ -180,7 +180,7 @@ jobs: host: ${{ secrets.EC2_PRODUCTION_HOST }} username: ec2-user key: ${{ secrets.EC2_PRODUCTION_SSH_KEY }} - command_timeout: 30m + command_timeout: 60m script: | set -euo pipefail cd /opt/synvya/keycast @@ -226,7 +226,7 @@ jobs: host: ${{ secrets.EC2_STAGING_HOST }} username: ec2-user key: ${{ secrets.EC2_STAGING_SSH_KEY }} - command_timeout: 30m + command_timeout: 60m script: | set -e @@ -307,7 +307,7 @@ jobs: host: ${{ secrets.EC2_PRODUCTION_HOST }} username: ec2-user key: ${{ secrets.EC2_PRODUCTION_SSH_KEY }} - command_timeout: 30m + command_timeout: 60m script: | set -e diff --git a/Dockerfile b/Dockerfile index 066778ff..90a72d42 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,6 +44,12 @@ RUN --mount=type=cache,target=/usr/local/cargo/registry,sharing=locked \ # Build stage for Bun frontend FROM oven/bun:1 AS web-builder +# Force serial execution: only start web-builder after rust-builder +# completes. BuildKit otherwise runs the two stages in parallel, which +# pushes a small EC2 (e.g. t3.medium) into swap thrash and locks the +# host. This COPY creates a build-graph dependency on rust-builder. +COPY --from=rust-builder /artifacts/keycast /tmp/.rust-builder-done + # Install build essentials for native modules RUN apt-get update && apt-get install -y \ python3 \ From f756f50b1617d384a20d16bca5486c3d4a74bc2b Mon Sep 17 00:00:00 2001 From: Alejandro Gil Date: Wed, 29 Apr 2026 15:19:06 -0700 Subject: [PATCH 2/2] fix(prep): write cargo config to invoking user's home, not /root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When ec2-prepare-host.sh is invoked under sudo (e.g. while debugging on the host), $HOME resolves to /root and the cargo [build] jobs=2 config lands in /root/.cargo/config.toml. The deploy/QA workflow runs the script over SSH as ec2-user, so it never reads that config — defeating the limit. Detect SUDO_USER and write to that user's home instead, then chown the .cargo/ tree back to them so cargo can read/write it when running under their UID. Co-Authored-By: Claude Opus 4.7 --- scripts/ec2-prepare-host.sh | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/scripts/ec2-prepare-host.sh b/scripts/ec2-prepare-host.sh index 43962ecf..d863ba70 100755 --- a/scripts/ec2-prepare-host.sh +++ b/scripts/ec2-prepare-host.sh @@ -8,6 +8,17 @@ SWAPFILE="${SWAPFILE:-/swapfile}" SWAP_SIZE="${SWAP_SIZE:-4G}" CARGO_JOBS="${CARGO_JOBS:-2}" +# When invoked under sudo, $HOME points to /root. We want the cargo +# config to land in the invoking user's home so the workflow (which +# runs as ec2-user over SSH) reads the same file. +if [ -n "${SUDO_USER:-}" ] && [ "${SUDO_USER}" != "root" ]; then + TARGET_HOME="$(getent passwd "${SUDO_USER}" | cut -d: -f6)" + TARGET_USER="${SUDO_USER}" +else + TARGET_HOME="${HOME}" + TARGET_USER="$(id -un)" +fi + echo "=== ec2-prepare-host: ensure swap (${SWAP_SIZE} at ${SWAPFILE}) ===" if swapon --show=NAME --noheadings | grep -qx "${SWAPFILE}"; then echo "swap already active at ${SWAPFILE}" @@ -25,9 +36,9 @@ else echo "swap enabled at ${SWAPFILE}" fi -echo "=== ec2-prepare-host: ensure ~/.cargo/config.toml jobs=${CARGO_JOBS} ===" -mkdir -p "${HOME}/.cargo" -CARGO_CFG="${HOME}/.cargo/config.toml" +echo "=== ec2-prepare-host: ensure ${TARGET_HOME}/.cargo/config.toml jobs=${CARGO_JOBS} ===" +mkdir -p "${TARGET_HOME}/.cargo" +CARGO_CFG="${TARGET_HOME}/.cargo/config.toml" if [ ! -f "${CARGO_CFG}" ] || ! grep -qE '^\[build\]' "${CARGO_CFG}"; then cat >> "${CARGO_CFG}" <