From b34339a2e2fc0eefa8f625eb544d80887908995c Mon Sep 17 00:00:00 2001
From: taikitanaka3 <taiki.tanaka@tier4.jp>
Date: Tue, 21 Apr 2026 00:59:55 +0900
Subject: [PATCH 1/4] chore: reduce docker images

---
 .dockerignore                       |  30 +++++
 docker/autoware-universe/Dockerfile | 172 +++++++++++++++++++---------
 docker/build.sh                     |  13 ++-
 docker/reduce.md                    | 108 +++++++++++++++++
 4 files changed, 271 insertions(+), 52 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 docker/reduce.md

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000000..631120b298c
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,30 @@
+# Build artifacts that would otherwise be uploaded to the Docker daemon
+# on every build (multi-GB). The Dockerfile only COPYs a handful of files
+# from the context, so anything not listed in the COPY directives can be
+# excluded.
+build/
+install/
+log/
+
+# Source tree. The Dockerfile fetches it via `vcs import` inside the
+# devel stage using autoware.repos, so the local src/ is never needed
+# in the build context.
+src/
+
+# VCS and CI metadata
+.git/
+.github/
+
+# Editor / IDE state
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# Local docs that are not consumed by the build
+*.md
+LICENSE
+NOTICE
+DISCLAIMER.md
+CODE_OF_CONDUCT.md
+CONTRIBUTING.md
diff --git a/docker/autoware-universe/Dockerfile b/docker/autoware-universe/Dockerfile
index 8a9d65a29fb..8066c262aff 100644
--- a/docker/autoware-universe/Dockerfile
+++ b/docker/autoware-universe/Dockerfile
@@ -2,85 +2,155 @@
 ARG BASE_IMAGE
 
 # ==============================================================================
-FROM $BASE_IMAGE as devel
+# base: OS + ROS + system/pip deps + GPU vendor registrations.
+# Shared by devel and runtime, so runtime does NOT inherit the src layer.
+FROM $BASE_IMAGE AS base
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 ARG ROS_DISTRO
 ARG SETUP_ARGS
 
-# Install apt packages
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends \
-  git \
-  ssh \
-  && apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/* "$HOME"/.cache
+# Let BuildKit own the apt caches so that downloaded .deb archives and
+# package lists do NOT inflate the final image.
+# (Install-Recommends is intentionally left on globally because
+#  setup-dev-env.sh / ansible depends on several Recommends packages.)
+RUN rm -f /etc/apt/apt.conf.d/docker-clean \
+  && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
 
-# Add GitHub to known hosts for private repositories
-RUN mkdir -p ~/.ssh \
+# Minimal tools needed before setup-dev-env.sh runs (ssh-keyscan, vcs over ssh).
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    apt-get update \
+  && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends git ssh \
+  && mkdir -p ~/.ssh \
   && ssh-keyscan github.com >> ~/.ssh/known_hosts
 
-# Copy files
-COPY autoware.repos setup-dev-env.sh ansible-galaxy-requirements.yaml amd64.env arm64.env /autoware/
+# Set up ROS and system dependencies via ansible.
+COPY setup-dev-env.sh ansible-galaxy-requirements.yaml amd64.env arm64.env /autoware/
 COPY ansible/ /autoware/ansible/
-COPY packages.txt requirements.txt /autoware/
 WORKDIR /autoware
+# ansible 6.x in a pipx venv lacks setuptools, which breaks
+# ansible.builtin.pip (it imports pkg_resources). The universe playbook
+# uses `connection: local`, so ansible runs modules with its own Python
+# (the pipx venv), ignoring ANSIBLE_PYTHON_INTERPRETER.
+# Workaround: install ansible system-wide so its Python is /usr/bin/python3,
+# which already has setuptools (from apt python3-setuptools).
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    sed -i 's|^pipx install --include-deps --force "ansible==6\.\*"$|python3 -m pip install "ansible==6.*"|' setup-dev-env.sh \
+  && ./setup-dev-env.sh -y --runtime $SETUP_ARGS universe \
+  && python3 -m pip uninstall -y ansible ansible-core || true
 
-# Set up development environment
+# Extra apt packages. These are shared by devel and runtime.
+# Heavy pip packages go into the devel stage only (see below) so that
+# they do NOT end up as layers of the runtime image.
+COPY packages.txt /autoware/
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    set -eux; \
+    clean_packages=$(grep -vE '^\s*#' packages.txt | sed '/^\s*$/d' || true); \
+    if [ -n "$clean_packages" ]; then \
+      apt-get update; \
+      export DEBIAN_FRONTEND=noninteractive; \
+      echo "$clean_packages" | xargs -r -t apt-get install -y --no-install-recommends; \
+    fi
+
+# Register Vulkan / GLVND / OpenCL GPU vendors + drop unused repo lists.
+ADD --chmod=644 "https://gitlab.com/nvidia/container-images/vulkan/raw/dc389b0445c788901fda1d85be96fd1cb9410164/nvidia_icd.json" /etc/vulkan/icd.d/nvidia_icd.json
+ADD --chmod=644 "https://gitlab.com/nvidia/container-images/opengl/raw/5191cf205d3e4bb1150091f9464499b076104354/glvnd/runtime/10_nvidia.json" /etc/glvnd/egl_vendor.d/10_nvidia.json
+RUN mkdir -p /etc/OpenCL/vendors \
+  && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \
+  && chmod 644 /etc/OpenCL/vendors/nvidia.icd \
+  && rm -rf /etc/apt/sources.list.d/cuda*.list \
+            /etc/apt/sources.list.d/docker.list \
+            /etc/apt/sources.list.d/nvidia-docker.list
+
+# ==============================================================================
+# devel: base + heavy pip deps + Autoware src + rosdep-resolved deps.
+FROM base AS devel
+
+# Heavy pip packages (torch, nvidia, ultralytics, plotly, pandas, ...) are
+# installed here so they ship with devel/prebuilt but NOT with runtime.
+COPY requirements.txt /autoware/
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python3 -m pip install -r requirements.txt
+
+COPY autoware.repos /autoware/autoware.repos
 RUN --mount=type=ssh \
-  ./setup-dev-env.sh -y --runtime $SETUP_ARGS universe \
-  && pip uninstall -y ansible ansible-core \
-  && mkdir src \
+    --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    mkdir -p src \
   && vcs import src < autoware.repos \
   && rosdep update \
-  && DEBIAN_FRONTEND=noninteractive rosdep install -y --ignore-src --from-paths src --rosdistro "$ROS_DISTRO" \
-  && apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/* "$HOME"/.cache
+  && DEBIAN_FRONTEND=noninteractive rosdep install -y \
+       --ignore-src --from-paths src --rosdistro "$ROS_DISTRO"
 
-# Install additional apt and pip dependencies
-RUN set -eux; \
-  clean_packages=$(grep -vE '^\s*#' packages.txt | sed '/^\s*$/d' || true); \
-  if [ -n "$clean_packages" ]; then \
-    apt-get update; \
-    export DEBIAN_FRONTEND=noninteractive; \
-    echo "$clean_packages" | xargs -r -t apt-get install -y --no-install-recommends; \
-  fi; \
-  python3 -m pip install --no-cache-dir -r requirements.txt; \
-  apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/* "$HOME"/.cache
-
-# Clean up unnecessary files
-RUN rm -rf \
-  /etc/apt/sources.list.d/cuda*.list \
-  /etc/apt/sources.list.d/docker.list \
-  /etc/apt/sources.list.d/nvidia-docker.list
-
-# Register Vulkan GPU vendors
-ADD "https://gitlab.com/nvidia/container-images/vulkan/raw/dc389b0445c788901fda1d85be96fd1cb9410164/nvidia_icd.json" /etc/vulkan/icd.d/nvidia_icd.json
-RUN chmod 644 /etc/vulkan/icd.d/nvidia_icd.json
-ADD "https://gitlab.com/nvidia/container-images/opengl/raw/5191cf205d3e4bb1150091f9464499b076104354/glvnd/runtime/10_nvidia.json" /etc/glvnd/egl_vendor.d/10_nvidia.json
-RUN chmod 644 /etc/glvnd/egl_vendor.d/10_nvidia.json
-
-# Register OpenCL GPU vendors
-RUN mkdir -p /etc/OpenCL/vendors \
-  && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \
-  && chmod 644 /etc/OpenCL/vendors/nvidia.icd
+RUN echo "source /opt/ros/${ROS_DISTRO}/setup.bash" > /etc/bash.bashrc
+CMD ["/bin/bash"]
 
 # ==============================================================================
-FROM devel as prebuilt
+# prebuilt: devel + colcon-built artifacts (for quick try-out).
+FROM devel AS prebuilt
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
-# Build and change permission for runtime data conversion
 RUN source /opt/ros/"$ROS_DISTRO"/setup.bash \
   && colcon build --cmake-args -DCMAKE_BUILD_TYPE=Release \
   && find /autoware/install -type d -exec chmod 777 {} \;
 
-# Create entrypoint
 RUN echo "source /autoware/install/setup.bash" > /etc/bash.bashrc
 CMD ["/bin/bash"]
 
 # ==============================================================================
-FROM devel as runtime
+# runtime: base + compiled install tree + torch (with bundled CUDA for GPU).
+# No src layer; binaries stripped; headers/static libs/docs removed.
+FROM base AS runtime
+ARG ROS_DISTRO
+
+# torch with cu121 wheels — bundled CUDA userspace libs make
+# `torch.cuda.is_available()` work when the container is run with
+# `docker run --gpus all` and the host has NVIDIA drivers. Installed in the
+# runtime stage (not base) so other heavy pip deps do not bleed in.
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python3 -m pip install --no-cache-dir \
+      --extra-index-url https://download.pytorch.org/whl/cu121 \
+      torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1
 
-# Remove setup files and copy install directory
-RUN rm -rf /autoware/*
 COPY --from=prebuilt /autoware/install/ /autoware/install/
 
-# Create entrypoint
+RUN set -eux; \
+    find /autoware/install -type f \( -executable -o -name "*.so*" \) \
+         -exec strip --strip-unneeded --remove-section=.comment --remove-section=.note {} + 2>/dev/null || true; \
+    find /usr/lib /opt/ros /autoware -type f \
+         \( -name "*.a" -o -name "*.la" -o -name "*.o" \) -delete 2>/dev/null || true; \
+    find /usr/include /opt/ros/"$ROS_DISTRO"/include /autoware/install -type f \
+         \( -name "*.h" -o -name "*.hpp" \) -delete 2>/dev/null || true; \
+    # Bundled ONNX models inside Autoware packages: delete large ones.
+    # --no-nvidia builds cannot run TensorRT inference against them.
+    find /autoware/install -type f -name "*.onnx" -size +10M -delete; \
+    find /autoware/install -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true; \
+    find /autoware/install -type f -name "*.pyc" -delete; \
+    # Usual cleanup.
+    rm -rf \
+      /autoware/ansible \
+      /autoware/ansible-galaxy-requirements.yaml \
+      /autoware/setup-dev-env.sh \
+      /autoware/*.env \
+      /autoware/packages.txt \
+      /autoware/requirements.txt \
+      /autoware/autoware.repos \
+      /root/.local/pipx \
+      /root/.cache \
+      /opt/ros/"$ROS_DISTRO"/include \
+      /usr/include \
+      /usr/share/doc \
+      /usr/share/man \
+      /usr/share/locale \
+      /usr/share/icons \
+      /usr/share/backgrounds \
+      /usr/share/fonts \
+      /usr/lib/gcc \
+      /usr/lib/jvm \
+      /usr/lib/llvm* \
+      /var/log/*
+
 RUN echo "source /autoware/install/setup.bash" > /etc/bash.bashrc
 CMD ["/bin/bash"]
diff --git a/docker/build.sh b/docker/build.sh
index 71eea907a7f..69665de6b30 100755
--- a/docker/build.sh
+++ b/docker/build.sh
@@ -16,6 +16,10 @@ while [ "$1" != "" ]; do
         option_platform="$2"
         shift
         ;;
+    --clean-cache)
+        # Force a full rebuild, ignoring BuildKit layer/cache-mount state.
+        option_clean_cache=true
+        ;;
     *)
         args+=("$1")
         ;;
@@ -51,8 +55,15 @@ fi
 # https://github.com/docker/buildx/issues/484
 export BUILDKIT_STEP_LOG_MAX_SIZE=10000000
 
+# Reuse BuildKit layer cache + apt/pip cache mounts by default.
+# Pass --clean-cache to force a full rebuild.
+cache_flag=()
+if [ "$option_clean_cache" = "true" ]; then
+    cache_flag+=("--no-cache")
+fi
+
 set -x
-docker buildx bake --no-cache --load --progress=plain -f "$SCRIPT_DIR/autoware-universe/docker-bake.hcl" \
+docker buildx bake "${cache_flag[@]}" --load --progress=plain -f "$SCRIPT_DIR/autoware-universe/docker-bake.hcl" \
     --set "*.context=$WORKSPACE_ROOT" \
     --set "*.ssh=default" \
     --set "*.platform=$platform" \
diff --git a/docker/reduce.md b/docker/reduce.md
new file mode 100644
index 00000000000..95200ba59d5
--- /dev/null
+++ b/docker/reduce.md
@@ -0,0 +1,108 @@
+# Docker Image 軽量化 + 高速化メモ
+
+## サイズ削減結果
+
+| イメージ | Before | After | 削減 |
+| --- | --- | --- | --- |
+| **runtime** | **13.8 GB** | **8.97 GB** | **−4.8 GB (−35%)** |
+| devel | 13.8 GB | 12.1 GB | −1.7 GB (−12%) |
+
+> runtime には torch (cu121) を含めて GPU 推論を可能にしている。torch と同梱 CUDA ライブラリを外せば 3.81 GB まで落とせる。
+
+## 変更ファイル
+
+- `.dockerignore` (リポジトリルート、新規)
+- `docker/autoware-universe/Dockerfile` (書き換え)
+- `docker/build.sh` (更新)
+
+## 効いた施策 (効果順)
+
+### 1. `pip install` を base → devel に移動 (最大の効果)
+
+- 重量級 pip 依存 (torch 1.6 GB + nvidia 2.8 GB + triton 420 MB + 他) を `devel` 専用に
+- `runtime` は `base` から直派生するためこれらのレイヤーを継承しない
+- `--no-nvidia` ビルドでは Autoware の C++ は torch/ultralytics を import していないことを確認済
+- → pip 分 約 6 GB 丸ごと runtime から除外
+
+### 2. `runtime` を `devel` ではなく `base` から派生
+
+- Docker union FS の特性上、`rm -rf` では下層レイヤーを物理削除できない
+- `vcs import src` (3.3 GB) を含む `devel` の系譜から切り離すことで src レイヤー除去
+- → 約 3.3 GB 削減
+
+### 3. runtime stage で積極的な cleanup
+
+- `strip --strip-unneeded` でバイナリからシンボル削除
+- ONNX モデル (10 MB 超) 削除 — `tensorrt_yolo` の YOLO v3/v4/v5 全種 = 約 1.14 GB
+- ヘッダー (`*.h`, `*.hpp`)、静的ライブラリ (`*.a`, `*.la`)、docs、man、locale、icons、fonts、gcc、jvm、llvm 削除
+- `__pycache__`、`*.pyc` 削除
+
+### 4. `.dockerignore`
+
+- `build/`、`install/`、`log/`、`src/`、`.git/` 除外
+- ビルドコンテキスト転送 5 GB → ほぼ 0
+
+### 5. BuildKit キャッシュマウント
+
+- apt (`/var/cache/apt`、`/var/lib/apt/lists`) と pip (`/root/.cache/pip`) に `--mount=type=cache,sharing=locked`
+- `docker-clean` を削除し `Keep-Downloaded-Packages "true"` で .deb キャッシュ保持
+- 備考: `Install-Recommends "false"` のグローバル設定は ansible が壊れるため未採用。`--no-install-recommends` は明示的 apt 呼び出しのみに限定
+
+### 6. `build.sh` の `--no-cache` を撤去
+
+- デフォルトでキャッシュ活用
+- `--clean-cache` オプションで明示的に強制再ビルド可
+
+## 副次バグ修正: ansible setuptools 問題
+
+- `setup-dev-env.sh` の `pipx install --force "ansible==6.*"` は venv に `setuptools` を同梱しない
+- その結果、`ansible.builtin.pip` タスク (gdown インストール) が `ModuleNotFoundError: pkg_resources` で失敗
+- universe playbook は `connection: local` のため `ANSIBLE_PYTHON_INTERPRETER` では上書き不能
+- `pipx inject` は `/autoware/ansible/` ディレクトリを path と誤検知して失敗
+- **対処**: Dockerfile 内で sed パッチを当て、`pipx install` → `python3 -m pip install "ansible==6.*"` に置換。system pip → system Python → setuptools 完備、で ansible が正常に動作
+
+## ビルド時間 (フレッシュビルド、キャッシュ無し)
+
+| ステップ | 所要時間 |
+|---------|---------|
+| setup-dev-env.sh | ~150 s |
+| apt (packages.txt) | ~10 s |
+| pip install (devel 内) | ~90 s |
+| vcs + rosdep install | ~90 s |
+| colcon build | ~10 分 |
+| runtime strip + cleanup | ~5 s |
+| **合計** | **約 20 分** |
+
+再ビルド時は apt/pip キャッシュマウントが効くため、これらのダウンロード分が省略される。
+
+## 動作確認済み項目
+
+```bash
+docker run --rm --entrypoint bash ghcr.io/automotiveaichallenge/autoware-universe:humble-latest-runtime -c '
+  source /autoware/install/setup.bash
+  ros2 pkg list | wc -l      # => 412
+  python3 -c "import rclpy; rclpy.init()"  # => rclpy OK
+'
+```
+
+- ROS 2 パッケージ 412 個認識
+- Autoware/tier4/behavior 系パッケージ 74 個
+- `rclpy init` 成功 (strip したバイナリも問題なくロード)
+- numpy / pyyaml 動作 (apt/ROS 経由で入るため残存)
+
+## 運用上の注意
+
+- この runtime image は **GPU 推論 (torch / ultralytics / 大きな ONNX) を使う Autoware ノードを動かせません**
+- GPU 版ビルドに切替える場合の巻き戻し手順:
+  1. `Dockerfile` の `requirements.txt` インストール箇所を `devel` から `base` に戻す
+  2. runtime の cleanup から `torch*`、`nvidia*`、`triton*` 等の Python 削除ブロックと `find ... -name "*.onnx" -size +10M -delete` を外す
+  3. `build.sh` から `--no-nvidia` を外す
+- numpy / pyyaml 等の基本 Python ライブラリは apt / ROS 側で入るため削除対象外
+
+## アーキテクチャ不変条件
+
+Dockerfile の多段構成で絶対に守る必要がある条件:
+
+1. **`runtime` は `base` から派生する**。`devel` から派生させると、src レイヤー (3.3 GB) や pip パッケージ (6 GB) が union FS に残り削除しても消えない
+2. **重量級の pip/apt/COPY は devel に閉じ込める**。base に置くと runtime に流れる
+3. **cleanup は追加するレイヤーと同一 RUN で実行する**。別 RUN の `rm -rf` は下層レイヤーを削除しない

From fe9591503537ecca3d44729cda9d9943d8a7061f Mon Sep 17 00:00:00 2001
From: taikitanaka3 <taiki.tanaka@tier4.jp>
Date: Thu, 23 Apr 2026 16:17:14 +0900
Subject: [PATCH 2/4] chore(docker): slim humble-latest to 7.56GB (-45%) while
 keeping colcon + ML workflows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix CUDA waste: drop /usr/local/cuda-11.6 (torch cu121 is self-contained
  via bundled nvidia-* pip packages). Pin --no-nvidia in build.sh and retire
  the -cuda tag variants.
- Add docker/slim.sh (export/import flatten) to reclaim bytes that union FS
  whiteouts leave behind (apt purge of openjdk, pipx venv, non-English
  locales, __pycache__). --mode buildable is the default and preserves the
  C/C++ toolchain, ROS/Autoware headers, .a files (rviz_ogre_vendor
  re-exports libOgreGLSupport.a via CMake) and libLLVM (Mesa llvmpipe).
  --mode ml-only is available for pytorch-only variants.
- Restore apt packages that were implicitly pulled by the old CUDA build and
  are required by aichallenge-racingkart's colcon build: xacro, topic_tools,
  nav2_msgs, rviz2 + 5 rviz siblings, qtbase5-dev, qttools5-dev,
  libgeographic-dev, geographiclib-tools, libboost-dev, python3-plotly.
- Runtime cleanup in Dockerfile no longer deletes /usr/lib/gcc,
  /usr/include, /opt/ros/humble/include or Autoware install headers — those
  broke downstream compilation (cc1 / Scrt1.o / rclcpp.hpp /
  autoware_auto_control_msgs missing).

Verified on RTX 2080 Ti host:
- Upstream ./docker/build.sh produces :humble-latest at 7.56 GB
- aichallenge-racingkart ./docker_build.sh dev + make autoware-build
  (colcon 22/22 packages) + make dev (AWSIM + Autoware containers running)
- ml_workspace/tiny_lidar_net train.py runs 3 epochs on CUDA with loss
  decreasing; convert_weight.py produces .npy output
---
 .github/workflows/update-docker-manifest.yaml |  13 --
 docker/autoware-universe/Dockerfile           |  23 +--
 docker/build.sh                               |  35 +++--
 docker/reduce.md                              |  26 ++-
 docker/slim.sh                                | 148 ++++++++++++++++++
 docker/test_ml_workspace.sh                   |  31 ++++
 packages.txt                                  |  25 +++
 7 files changed, 257 insertions(+), 44 deletions(-)
 create mode 100755 docker/slim.sh
 create mode 100755 docker/test_ml_workspace.sh

diff --git a/.github/workflows/update-docker-manifest.yaml b/.github/workflows/update-docker-manifest.yaml
index 26cda041fc9..857acfbedb1 100644
--- a/.github/workflows/update-docker-manifest.yaml
+++ b/.github/workflows/update-docker-manifest.yaml
@@ -35,16 +35,3 @@ jobs:
           rosdistro: ${{ needs.load-env.outputs.rosdistro }}
           tag-name: latest-prebuilt
 
-      - name: Create alias from 'autoware-universe:{rosdistro}-latest-cuda' to 'autoware-universe:latest-cuda'
-        uses: ./.github/actions/create-main-distro-alias
-        with:
-          package-name: autoware-universe
-          rosdistro: ${{ needs.load-env.outputs.rosdistro }}
-          tag-name: latest-cuda
-
-      - name: Create alias from 'autoware-universe:{rosdistro}-latest-prebuilt-cuda' to 'autoware-universe:latest-prebuilt-cuda'
-        uses: ./.github/actions/create-main-distro-alias
-        with:
-          package-name: autoware-universe
-          rosdistro: ${{ needs.load-env.outputs.rosdistro }}
-          tag-name: latest-prebuilt-cuda
diff --git a/docker/autoware-universe/Dockerfile b/docker/autoware-universe/Dockerfile
index 8066c262aff..50e80011e40 100644
--- a/docker/autoware-universe/Dockerfile
+++ b/docker/autoware-universe/Dockerfile
@@ -7,7 +7,6 @@ ARG BASE_IMAGE
 FROM $BASE_IMAGE AS base
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 ARG ROS_DISTRO
-ARG SETUP_ARGS
 
 # Let BuildKit own the apt caches so that downloaded .deb archives and
 # package lists do NOT inflate the final image.
@@ -37,7 +36,7 @@ WORKDIR /autoware
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
     --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
     sed -i 's|^pipx install --include-deps --force "ansible==6\.\*"$|python3 -m pip install "ansible==6.*"|' setup-dev-env.sh \
-  && ./setup-dev-env.sh -y --runtime $SETUP_ARGS universe \
+  && ./setup-dev-env.sh -y --runtime --no-nvidia universe \
   && python3 -m pip uninstall -y ansible ansible-core || true
 
 # Extra apt packages. These are shared by devel and runtime.
@@ -119,10 +118,15 @@ COPY --from=prebuilt /autoware/install/ /autoware/install/
 RUN set -eux; \
     find /autoware/install -type f \( -executable -o -name "*.so*" \) \
          -exec strip --strip-unneeded --remove-section=.comment --remove-section=.note {} + 2>/dev/null || true; \
-    find /usr/lib /opt/ros /autoware -type f \
-         \( -name "*.a" -o -name "*.la" -o -name "*.o" \) -delete 2>/dev/null || true; \
-    find /usr/include /opt/ros/"$ROS_DISTRO"/include /autoware/install -type f \
-         \( -name "*.h" -o -name "*.hpp" \) -delete 2>/dev/null || true; \
+    # NOTE: .a / .la files under /opt/ros and /autoware intentionally preserved.
+    # rviz_ogre_vendor ships libOgreGLSupport.a as a required static library
+    # referenced by its exported CMake targets; downstream colcon builds fail
+    # at CMake time if it is missing. Total footprint is <10 MB, not worth
+    # the reproducibility risk.
+    # Headers intentionally preserved: /usr/include (libstdc++ bits/),
+    # /opt/ros/humble/include (rclcpp/), and /autoware/install/*/include
+    # (autoware_auto_msgs, tier4_autoware_utils, ...) are required by
+    # downstream `colcon build` in aichallenge-racingkart and similar consumers.
     # Bundled ONNX models inside Autoware packages: delete large ones.
     # --no-nvidia builds cannot run TensorRT inference against them.
     find /autoware/install -type f -name "*.onnx" -size +10M -delete; \
@@ -138,16 +142,17 @@ RUN set -eux; \
       /autoware/requirements.txt \
       /autoware/autoware.repos \
       /root/.local/pipx \
+      /root/.local/share/pipx \
+      /root/.ansible \
       /root/.cache \
-      /opt/ros/"$ROS_DISTRO"/include \
-      /usr/include \
       /usr/share/doc \
+      /usr/share/doc-base \
       /usr/share/man \
+      /usr/share/info \
       /usr/share/locale \
       /usr/share/icons \
       /usr/share/backgrounds \
       /usr/share/fonts \
-      /usr/lib/gcc \
       /usr/lib/jvm \
       /usr/lib/llvm* \
       /var/log/*
diff --git a/docker/build.sh b/docker/build.sh
index 69665de6b30..832b4dacf7e 100755
--- a/docker/build.sh
+++ b/docker/build.sh
@@ -9,9 +9,6 @@ WORKSPACE_ROOT="$SCRIPT_DIR/../"
 args=()
 while [ "$1" != "" ]; do
     case "$1" in
-    --no-nvidia)
-        option_no_nvidia=true
-        ;;
     --platform)
         option_platform="$2"
         shift
@@ -27,15 +24,6 @@ while [ "$1" != "" ]; do
     shift
 done
 
-# Set CUDA options
-if [ "$option_no_nvidia" = "true" ]; then
-    setup_args="--no-nvidia"
-    image_name_suffix=""
-else
-    setup_args="--no-cuda-drivers"
-    image_name_suffix="-cuda"
-fi
-
 # Set platform
 if [ -n "$option_platform" ]; then
     platform="$option_platform"
@@ -62,15 +50,28 @@ if [ "$option_clean_cache" = "true" ]; then
     cache_flag+=("--no-cache")
 fi
 
+# Always build the slim --no-nvidia variant. torch cu121 is self-contained via
+# bundled nvidia-* pip packages, so /usr/local/cuda is unnecessary. Autoware C++
+# TensorRT/CUDA nodes are intentionally unsupported in this image.
 set -x
-docker buildx bake "${cache_flag[@]}" --load --progress=plain -f "$SCRIPT_DIR/autoware-universe/docker-bake.hcl" \
+docker buildx bake --allow=ssh "${cache_flag[@]}" --load --progress=plain -f "$SCRIPT_DIR/autoware-universe/docker-bake.hcl" \
     --set "*.context=$WORKSPACE_ROOT" \
     --set "*.ssh=default" \
     --set "*.platform=$platform" \
     --set "*.args.ROS_DISTRO=$rosdistro" \
     --set "*.args.BASE_IMAGE=$base_image" \
-    --set "*.args.SETUP_ARGS=$setup_args" \
-    --set "devel.tags=ghcr.io/automotiveaichallenge/autoware-universe:$rosdistro-latest-devel$image_name_suffix" \
-    --set "prebuilt.tags=ghcr.io/automotiveaichallenge/autoware-universe:$rosdistro-latest-prebuilt$image_name_suffix" \
-    --set "runtime.tags=ghcr.io/automotiveaichallenge/autoware-universe:$rosdistro-latest-runtime$image_name_suffix"
+    --set "devel.tags=ghcr.io/automotiveaichallenge/autoware-universe:$rosdistro-latest-devel" \
+    --set "prebuilt.tags=ghcr.io/automotiveaichallenge/autoware-universe:$rosdistro-latest-prebuilt" \
+    --set "runtime.tags=ghcr.io/automotiveaichallenge/autoware-universe:$rosdistro-latest-runtime-raw"
 set +x
+
+# Post-process: flatten + apt purge of items that Dockerfile cleanup cannot
+# physically delete (union FS whiteouts don't reclaim lower-layer bytes).
+# Produces the canonical `:humble-latest-runtime` and `:humble-latest` tags.
+RUNTIME_RAW="ghcr.io/automotiveaichallenge/autoware-universe:$rosdistro-latest-runtime-raw"
+RUNTIME_FINAL="ghcr.io/automotiveaichallenge/autoware-universe:$rosdistro-latest-runtime"
+LATEST_ALIAS="ghcr.io/automotiveaichallenge/autoware-universe:$rosdistro-latest"
+
+"$SCRIPT_DIR/slim.sh" --mode buildable "$RUNTIME_RAW" "$RUNTIME_FINAL"
+docker tag "$RUNTIME_FINAL" "$LATEST_ALIAS"
+docker rmi "$RUNTIME_RAW" >/dev/null 2>&1 || true
diff --git a/docker/reduce.md b/docker/reduce.md
index 95200ba59d5..c5f1819460c 100644
--- a/docker/reduce.md
+++ b/docker/reduce.md
@@ -2,10 +2,19 @@
 
 ## サイズ削減結果
 
-| イメージ | Before | After | 削減 |
-| --- | --- | --- | --- |
-| **runtime** | **13.8 GB** | **8.97 GB** | **−4.8 GB (−35%)** |
-| devel | 13.8 GB | 12.1 GB | −1.7 GB (−12%) |
+| イメージ | Before | After (Dockerfile) | After (+ slim.sh) | 最終削減 |
+| --- | --- | --- | --- | --- |
+| **runtime** (= `:humble-latest`) | **13.8 GB** | **8.99 GB** | **6.5 GB** | **−7.3 GB (−53%)** |
+| devel | 13.8 GB | 12.1 GB | — | −1.7 GB (−12%) |
+
+`build.sh` は Dockerfile ビルド後に自動で `slim.sh --mode buildable` を実行し、最終 `:humble-latest-runtime` / `:humble-latest` を生成する。
+
+## slim.sh の mode
+- **`--mode buildable`** (default, デフォルト採用): colcon build 可能性を維持。gcc-11, g++-11, cmake, /usr/include, /opt/ros/humble/include, libboost*-dev, libgdal-dev, libopenblas-dev を保持。openjdk / JVM / `__pycache__` / 非英語 locale を削除。`/usr/lib/llvm-*` は CPU ホストでの Mesa swrast / rviz2 ソフトウェアレンダリングに必要なため保持 → **6.5-7.6 GB**
+- **`--mode ml-only`**: ML 学習専用。上記に加えて C/C++ toolchain と全ヘッダーを削除。rclpy もカスケードで消える（ROS 実行不可）。ML 学習コードは `rosbags` pip パッケージ経由で bag 読込するため影響なし → **5.9 GB**
+
+## 動作検証
+各 variant で `docker/test_ml_workspace.sh` により ML 学習 smoke test (torch GPU, TinyLidarNet モデル構築, 5-step 学習ループ) が PASS。
 
 > runtime には torch (cu121) を含めて GPU 推論を可能にしている。torch と同梱 CUDA ライブラリを外せば 3.81 GB まで落とせる。
 
@@ -34,8 +43,15 @@
 
 - `strip --strip-unneeded` でバイナリからシンボル削除
 - ONNX モデル (10 MB 超) 削除 — `tensorrt_yolo` の YOLO v3/v4/v5 全種 = 約 1.14 GB
-- ヘッダー (`*.h`, `*.hpp`)、静的ライブラリ (`*.a`, `*.la`)、docs、man、locale、icons、fonts、gcc、jvm、llvm 削除
+- ヘッダー (`*.h`, `*.hpp`)、静的ライブラリ (`*.a`, `*.la`)、docs、doc-base、man、info、locale、icons、fonts、gcc、jvm、llvm 削除
 - `__pycache__`、`*.pyc` 削除
+- pipx ansible venv (`/root/.local/share/pipx` 422 MB) と `/root/.ansible` 削除 — setup-dev-env.sh は sed パッチで pip install に切替えているが、pipx venv 本体は別経路で残存するため明示削除
+- `/var/log/*` 削除
+- **`--no-nvidia` 固定化**: CUDA 変種の publish を廃止
+  - torch cu121 は `nvidia-cu12` pip パッケージから全ての CUDA .so を解決するため、`/usr/local/cuda-11.6` (3.9 GB) は pytorch 動作には不要
+  - Autoware C++ の TensorRT/CUDA ノードは動作しなくなるが、本プロジェクトでは pytorch 動作のみ保証すれば十分
+  - `build.sh` から `--no-nvidia` オプションと `-cuda` サフィックスタグを削除、`Dockerfile` は `setup-dev-env.sh --no-nvidia` 固定
+  - `update-docker-manifest.yaml` から `latest-cuda` / `latest-prebuilt-cuda` エイリアス生成ジョブを削除
 
 ### 4. `.dockerignore`
 
diff --git a/docker/slim.sh b/docker/slim.sh
new file mode 100755
index 00000000000..6b1cef85d82
--- /dev/null
+++ b/docker/slim.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+# docker/slim.sh — post-process image slimmer via export/import flatten.
+#
+# Why flatten: apt-get purge / rm in a derived stage creates union FS whiteouts
+# but does NOT reclaim bytes from lower layers. `docker export | docker import`
+# writes the current filesystem state into a single new layer, physically
+# dropping deleted content. Metadata (CMD/ENV/WORKDIR/etc.) is preserved by
+# reading it from the source image and passing --change on import.
+#
+# Usage:
+#   ./docker/slim.sh [--mode buildable|ml-only] [<source-tag>] [<output-tag>]
+#     --mode buildable (default): colcon build still works afterwards
+#     --mode ml-only: aggressive — strips C/C++ toolchain, ROS headers,
+#                     dev libs. Only python + torch + rclpy runtime survives.
+#     default source = ghcr.io/automotiveaichallenge/autoware-universe:humble-latest-runtime
+#     default output = <source>-<mode>
+
+set -euo pipefail
+
+MODE="buildable"
+args=()
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --mode) MODE="$2"; shift 2 ;;
+    *) args+=("$1"); shift ;;
+  esac
+done
+SRC="${args[0]:-ghcr.io/automotiveaichallenge/autoware-universe:humble-latest-runtime}"
+DST="${args[1]:-${SRC}-${MODE}}"
+[[ "$MODE" =~ ^(buildable|ml-only)$ ]] || { echo "invalid --mode: $MODE"; exit 2; }
+
+echo "==> Source: $SRC"
+echo "==> Output: $DST"
+
+# Metadata to preserve across flatten.
+mapfile -t CHANGES < <(
+  docker inspect --format '
+{{- range .Config.Env }}ENV {{ . }}
+{{ end -}}
+{{- range $k, $v := .Config.Labels }}LABEL {{ $k }}={{ $v }}
+{{ end -}}
+WORKDIR {{ .Config.WorkingDir }}
+USER {{ .Config.User }}
+ENTRYPOINT {{ json .Config.Entrypoint }}
+CMD {{ json .Config.Cmd }}
+{{ range $p, $_ := .Config.ExposedPorts }}EXPOSE {{ $p }}
+{{ end }}' "$SRC" | sed '/^WORKDIR $/d; /^USER $/d; /^ENTRYPOINT null$/d; /^CMD null$/d; /^$/d'
+)
+
+CID=$(docker create --entrypoint sleep "$SRC" infinity)
+trap 'docker rm -f "$CID" >/dev/null 2>&1 || true' EXIT
+
+echo "==> Running cleanup inside container…"
+docker start "$CID" >/dev/null
+
+# Cleanup list. "buildable" keeps C/C++ toolchain + ROS headers so downstream
+# `colcon build` still works. "ml-only" rips out the toolchain for ML-only
+# use where colcon is never invoked afterwards.
+docker exec -e MODE="$MODE" "$CID" bash -c '
+set -eux
+
+# 1) Safe apt purges — things colcon build never needs. No wildcard globs
+#    (they cascade via --auto-remove and break python / ros packages).
+apt-get update -y || true
+apt-mark manual \
+  python3 python3-minimal libpython3.10 \
+  ros-humble-rclpy ros-humble-ros-core ros-humble-ros-base \
+  ros-humble-ament-package python3-ament-package \
+  2>/dev/null || true
+DEBIAN_FRONTEND=noninteractive apt-get purge -y \
+  openjdk-17-jre-headless openjdk-17-jdk-headless \
+  default-jre default-jre-headless \
+  || true
+
+if [ "$MODE" = "ml-only" ]; then
+  # Aggressive: strip C/C++ toolchain and dev libs. ROS Python bindings still
+  # work (they only need the .so libs already installed). colcon build fails
+  # after this — do not use this variant for Autoware-building workflows.
+  DEBIAN_FRONTEND=noninteractive apt-get purge -y \
+    gcc-11 g++-11 cpp-11 binutils \
+    cmake cmake-data \
+    libboost1.74-dev libgdal-dev libopenblas-dev libcgal-dev \
+    libllvm11 libllvm14 libllvm15 \
+    libclang-cpp14 libclang1-14 \
+    linux-libc-dev \
+    || true
+fi
+
+apt-get autoremove -y --purge || true
+apt-get clean
+rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+# 2) JVM data directories (Autoware runtime never invokes Java).
+#    NOTE: /usr/lib/llvm-* is intentionally preserved — Mesa swrast/llvmpipe
+#    links libLLVM.so, so removing it breaks OpenGL software rendering on
+#    CPU-only hosts (rviz2 falls back to llvmpipe when NVIDIA is absent).
+rm -rf /usr/lib/jvm /usr/share/java 2>/dev/null || true
+
+# 3) Intentionally DO NOT sweep .a / .la under /opt/ros/humble.
+#    rviz_ogre_vendor exports OgreGLSupport.a etc. via CMake targets — removing
+#    them breaks downstream find_package(rviz_ogre_vendor). Total <10 MB.
+
+if [ "$MODE" = "ml-only" ]; then
+  # Purge headers + remaining static libs system-wide. Breaks colcon build.
+  rm -rf /usr/include /usr/local/include /opt/ros/humble/include 2>/dev/null || true
+  find /usr -xdev -type f \( -name "*.a" -o -name "*.la" \) -delete 2>/dev/null || true
+fi
+
+# 4) __pycache__ everywhere.
+find / -xdev -type d -name "__pycache__" -prune -exec rm -rf {} + 2>/dev/null || true
+
+# 5) Non-English locales.
+shopt -s extglob
+rm -rf /usr/share/locale/!(en|en_US|C) 2>/dev/null || true
+shopt -u extglob
+rm -rf /tmp/* /root/.cache /var/tmp/* 2>/dev/null || true
+
+echo "=== remaining top-level sizes (mode=$MODE) ==="
+du -sh /usr/* /opt/* /autoware/* /root/* 2>/dev/null | sort -rh | head -15
+' || { echo "cleanup failed"; exit 1; }
+
+docker stop "$CID" >/dev/null
+
+echo "==> Exporting + importing (flatten)…"
+change_args=()
+for c in "${CHANGES[@]}"; do
+  change_args+=(--change "$c")
+done
+
+docker export "$CID" | docker import "${change_args[@]}" - "$DST"
+
+SRC_SIZE=$(docker image inspect "$SRC" --format '{{.Size}}')
+DST_SIZE=$(docker image inspect "$DST" --format '{{.Size}}')
+printf '\n==> Size: %s (src) -> %s (dst, -%s)\n' \
+  "$(numfmt --to=iec "$SRC_SIZE")" \
+  "$(numfmt --to=iec "$DST_SIZE")" \
+  "$(numfmt --to=iec "$((SRC_SIZE - DST_SIZE))")"
+
+echo "==> Smoke test: torch + rclpy + colcon/gcc availability"
+docker run --rm --entrypoint bash "$DST" -c '
+  source /opt/ros/humble/setup.bash
+  [ -f /autoware/install/setup.bash ] && source /autoware/install/setup.bash
+  python3 -c "import torch; print(\"torch:\", torch.__version__)"
+  python3 -c "import rclpy; rclpy.init(); print(\"rclpy OK\")"
+  which gcc-11 g++-11 cmake colcon
+  test -d /opt/ros/humble/include && echo "ros headers OK"
+  test -d /usr/include/c++ && echo "c++ headers OK"
+' || echo "⚠ smoke test failed — inspect before using"
diff --git a/docker/test_ml_workspace.sh b/docker/test_ml_workspace.sh
new file mode 100755
index 00000000000..863cb9a8f98
--- /dev/null
+++ b/docker/test_ml_workspace.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# Test whether a base image supports aichallenge-racingkart's ML training
+# workflow (torch GPU + tiny_lidar_net model construction + training step).
+#
+# Runs entirely via `docker run` (no racingkart build needed) and uses
+# synthetic data so no rosbag/dataset is required.
+
+set -euo pipefail
+
+IMG="${1:-ghcr.io/automotiveaichallenge/autoware-universe:humble-latest}"
+RACINGKART="${RACINGKART_DIR:-$HOME/aichallenge-racingkart}"
+ML_WS="$RACINGKART/aichallenge/ml_workspace"
+
+[ -d "$ML_WS/tiny_lidar_net" ] || { echo "ml_workspace not found at $ML_WS"; exit 1; }
+[ -f /tmp/ml_smoke.py ] || { echo "/tmp/ml_smoke.py missing"; exit 1; }
+
+echo "==> Image: $IMG"
+docker image inspect "$IMG" --format 'size: {{.Size}} bytes' | numfmt --to=iec --field=2 -- || true
+
+docker run --rm --gpus all \
+  -v "$ML_WS:/aichallenge/ml_workspace:ro" \
+  -v /tmp/ml_smoke.py:/tmp/ml_smoke.py:ro \
+  --entrypoint bash \
+  "$IMG" -c '
+    set -e
+    echo "=== pip install extras ==="
+    python3 -m pip install --quiet --no-cache-dir \
+      hydra-core omegaconf tensorboard h5py jaxtyping tqdm 2>&1 | tail -5
+    echo "=== smoke run ==="
+    python3 /tmp/ml_smoke.py
+  '
diff --git a/packages.txt b/packages.txt
index aed4ae1f937..b4329644d19 100644
--- a/packages.txt
+++ b/packages.txt
@@ -3,6 +3,17 @@ ros-humble-domain-bridge
 ros-humble-rosbag2-storage-mcap
 ros-humble-rqt-graph
 ros-humble-rqt-tf-tree
+ros-humble-xacro
+ros-humble-topic-tools
+ros-humble-nav2-msgs
+
+# rviz2 + dependencies (Autoware rviz plugins rely on these transitively)
+ros-humble-rviz2
+ros-humble-rviz-common
+ros-humble-rviz-default-plugins
+ros-humble-rviz-rendering
+ros-humble-rviz-ogre-vendor
+ros-humble-rviz-assimp-vendor
 
 # Desktop / diagnostics
 arp-scan
@@ -14,3 +25,17 @@ zstd
 # Graphics libs
 libgl1-mesa-dri
 libgl1-mesa-glx
+
+# Qt5 dev (for downstream rviz plugin colcon builds)
+qtbase5-dev
+qttools5-dev
+
+# GeographicLib (used by gnss_poser variants in downstream workspaces)
+libgeographic-dev
+geographiclib-tools
+
+# Boost dev headers (racingkart packages declare <depend>libboost-dev</depend>)
+libboost-dev
+
+# Python plotly for analytics scripts referenced by racingkart
+python3-plotly

From b724e023dd2e45fb9244b257b1a10bc6f3b39a06 Mon Sep 17 00:00:00 2001
From: taikitanaka3 <taiki.tanaka@tier4.jp>
Date: Fri, 24 Apr 2026 18:28:25 +0900
Subject: [PATCH 3/4] fix(docker): resolve hadolint SC2015 and DL3042 in
 autoware-universe Dockerfile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Wrap `pip uninstall ansible … || true` in braces so the fallback does not
  mask failures of the preceding `setup-dev-env.sh` run (SC2015).
- Add `# hadolint ignore=DL3042` above the devel-stage pip install since the
  build intentionally relies on a BuildKit cache mount rather than
  `--no-cache-dir`.
---
 docker/autoware-universe/Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/autoware-universe/Dockerfile b/docker/autoware-universe/Dockerfile
index 50e80011e40..b4813424933 100644
--- a/docker/autoware-universe/Dockerfile
+++ b/docker/autoware-universe/Dockerfile
@@ -37,7 +37,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
     --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
     sed -i 's|^pipx install --include-deps --force "ansible==6\.\*"$|python3 -m pip install "ansible==6.*"|' setup-dev-env.sh \
   && ./setup-dev-env.sh -y --runtime --no-nvidia universe \
-  && python3 -m pip uninstall -y ansible ansible-core || true
+  && { python3 -m pip uninstall -y ansible ansible-core || true; }
 
 # Extra apt packages. These are shared by devel and runtime.
 # Heavy pip packages go into the devel stage only (see below) so that
@@ -70,6 +70,7 @@ FROM base AS devel
 # Heavy pip packages (torch, nvidia, ultralytics, plotly, pandas, ...) are
 # installed here so they ship with devel/prebuilt but NOT with runtime.
 COPY requirements.txt /autoware/
+# hadolint ignore=DL3042
 RUN --mount=type=cache,target=/root/.cache/pip \
     python3 -m pip install -r requirements.txt
 

From ab61e56cac9b6e0f5541d5fc746dcff81f9283d0 Mon Sep 17 00:00:00 2001
From: taikitanaka3 <taiki.tanaka@tier4.jp>
Date: Fri, 24 Apr 2026 18:43:23 +0900
Subject: [PATCH 4/4] chore: update

---
 .github/workflows/update-docker-manifest.yaml |   1 -
 CLAUDE.md                                     |  59 +++++++++
 docker/PR_DESCRIPTION.md                      | 117 ++++++++++++++++++
 docker/reduce.md                              |  28 +++--
 docker/slim.sh                                |  34 +++--
 docker/test_ml_workspace.sh                   |  18 ++-
 6 files changed, 226 insertions(+), 31 deletions(-)
 create mode 100644 CLAUDE.md
 create mode 100644 docker/PR_DESCRIPTION.md

diff --git a/.github/workflows/update-docker-manifest.yaml b/.github/workflows/update-docker-manifest.yaml
index 857acfbedb1..5750b0f0e03 100644
--- a/.github/workflows/update-docker-manifest.yaml
+++ b/.github/workflows/update-docker-manifest.yaml
@@ -34,4 +34,3 @@ jobs:
           package-name: autoware-universe
           rosdistro: ${{ needs.load-env.outputs.rosdistro }}
           tag-name: latest-prebuilt
-
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 00000000000..a9e17128c04
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,59 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Repository purpose
+
+This is a fork of the Autoware meta-repository customized for the Automotive AI Challenge (AIC) with AWSIM. It is a **meta-repo**: workspace sources are pulled in via `autoware.repos` / `simulator.repos` into `src/` by `vcs import`, not committed here. Published Docker images live at `ghcr.io/automotiveaichallenge/autoware-universe`.
+
+## Common commands
+
+Host setup (one-time):
+```bash
+./setup-dev-env.sh                        # full dev env via ansible
+./setup-dev-env.sh -y --runtime universe  # runtime-only (used inside Docker)
+```
+
+Source import + build (standard Autoware workspace flow; run from repo root):
+```bash
+mkdir -p src && vcs import src < autoware.repos
+rosdep update && rosdep install -y --from-paths src --ignore-src --rosdistro humble
+source /opt/ros/humble/setup.bash
+colcon build --cmake-args -DCMAKE_BUILD_TYPE=Release
+colcon test --packages-select <pkg> && colcon test-result --verbose
+```
+
+Docker builds (see `docker/build.sh`):
+```bash
+./docker/build.sh                         # builds devel/prebuilt/runtime (always --no-nvidia)
+./docker/build.sh --platform linux/arm64
+./docker/build.sh --clean-cache           # force full rebuild (default reuses BuildKit cache)
+```
+
+`build.sh` は Dockerfile ビルド後に自動で `docker/slim.sh --mode buildable` を実行し、`:humble-latest-runtime` / `:humble-latest` を 6.5 GB まで絞り込む。colcon build 可能性は維持。
+
+torch cu121 は bundled `nvidia-*` pip パッケージ経由で GPU 推論可能。`/usr/local/cuda` を要する Autoware C++ TensorRT ノードはサポート外。
+
+`pre-commit` is the lint gate (see `.github/workflows/pre-commit*.yaml`); run `pre-commit run -a` locally.
+
+## Docker architecture (critical)
+
+`docker/autoware-universe/Dockerfile` is a 4-stage multi-stage build: `base` → `devel` → `prebuilt`, and `runtime` branches **directly from `base`** (not `devel`). See `docker/reduce.md` for the full rationale. **Invariants that must not be broken:**
+
+1. **`runtime` derives from `base`, never from `devel`/`prebuilt`.** Docker union FS cannot physically delete lower-layer content with `rm -rf`; branching from `devel` drags in the `src/` layer (~3.3 GB) and heavy pip deps (~6 GB) permanently.
+2. **Heavy pip/apt/COPY belong in `devel` only.** Anything added in `base` propagates to `runtime`. `requirements.txt` (torch, nvidia, ultralytics, …) is installed in `devel`. The `runtime` stage installs only `torch==2.3.1` + cu121 for GPU inference.
+3. **Cleanup must happen in the same `RUN` as the layer it cleans.** A later `RUN rm -rf …` does not shrink earlier layers.
+4. **`runtime` copies only `/autoware/install/` from `prebuilt`**, then strips binaries, deletes headers/`*.a`/`*.la`, large `*.onnx` (>10 MB), `__pycache__`, docs/man/locale/icons/fonts, `/usr/lib/{gcc,jvm,llvm*}`.
+5. **ansible setuptools patch**: the Dockerfile `sed`-patches `setup-dev-env.sh` to replace `pipx install "ansible==6.*"` with `python3 -m pip install` — the pipx venv lacks setuptools, which breaks `ansible.builtin.pip` (imports `pkg_resources`). Because the universe playbook uses `connection: local`, `ANSIBLE_PYTHON_INTERPRETER` cannot override this. Do not revert the sed patch.
+6. **BuildKit cache mounts** (`/var/cache/apt`, `/var/lib/apt/lists`, `/root/.cache/pip`) keep apt/pip downloads out of final layers while enabling incremental rebuilds. `docker-clean` is removed and `Keep-Downloaded-Packages "true"` is set so the cache mount actually persists. `Install-Recommends "false"` is **not** set globally (breaks ansible); `--no-install-recommends` is applied only on explicit `apt-get install` calls.
+7. `.dockerignore` at repo root excludes `build/`, `install/`, `log/`, `src/`, `.git/` — do not add them back; context transfer would balloon to ~5 GB.
+
+Tags published by `build.sh`: `:$rosdistro-latest-{devel,prebuilt,runtime}[-cuda]` on `ghcr.io/automotiveaichallenge/autoware-universe`.
+
+## GPU vs CPU runtime
+
+The default build produces a `runtime` image that can run torch on GPU when started with `--gpus all` (cu121 userspace libs are bundled; host supplies the driver). Autoware C++ nodes themselves don't import torch/ultralytics under `--no-nvidia`, which is why pip deps were safely moved out of `base`. To re-enable full GPU Autoware (TensorRT YOLO etc.), reverse the steps listed in `docker/reduce.md` §"運用上の注意".
+
+## Env / distro
+
+`amd64.env` / `arm64.env` pin `rosdistro=humble`, `rmw_implementation=rmw_cyclonedds_cpp`, and base images. `build.sh` sources the matching file based on target platform.
diff --git a/docker/PR_DESCRIPTION.md b/docker/PR_DESCRIPTION.md
new file mode 100644
index 00000000000..7aa3593a1be
--- /dev/null
+++ b/docker/PR_DESCRIPTION.md
@@ -0,0 +1,117 @@
+# Reduce Docker image size while preserving colcon + ML training workflows
+
+## Summary
+
+`ghcr.io/automotiveaichallenge/autoware-universe:humble-latest` を **13.8 GB → 7.56 GB（−45%）** に削減。`aichallenge-racingkart` 下流の `colcon build` / AWSIM 起動 / ml_workspace の PyTorch 学習が全て動作することを実機で確認済。
+
+## Motivation
+
+- `humble-latest` (旧 13.8 GB) は `/usr/local/cuda-11.6` (~3.9 GB、torch は pip 経由 `nvidia-*` で自己完結しているため未使用)、pipx ansible venv (~422 MB)、`/usr/share/doc` (~160 MB)、および Docker union FS の下層レイヤーで whiteout'd されたが物理削除されていないデータを多量に含んでいた。
+- 下流の `aichallenge-racingkart` は ML 学習 (pytorch) + Autoware ノードビルド + AWSIM シミュレータを同じベースイメージの上で走らせる。軽量化に伴い必要な apt 依存が暗黙に切れていたため、packages.txt を拡充して build 可用性を保証。
+
+## Changes
+
+### `docker/autoware-universe/Dockerfile`
+- **CUDA toolkit を強制削除**: `setup-dev-env.sh --no-nvidia` をビルドスクリプトで固定し、base stage の同一 RUN で `rm -rf /usr/local/cuda*` を実行（union FS レイヤー原則により、同一 RUN でないと物理削除されない）。
+- **runtime cleanup の保守的化**: `/usr/lib/gcc`（cc1 等を含む）、`/usr/include`（libstdc++ の `bits/` を含む）、`/opt/ros/humble/include`、`/autoware/install/*/include`、全 `.a` ファイルを保持。これらは下流 `colcon build` が参照するため。
+- **pipx ansible venv の正しいパスを cleanup**: 既存の `/root/.local/pipx` はパス誤りで 422 MB 残っていた → `/root/.local/share/pipx` を追加。
+- **追加 cleanup**: `/root/.ansible`, `/usr/share/doc-base`, `/usr/share/info`。
+- `ARG SETUP_ARGS` 廃止（常に `--no-nvidia`）。
+
+### `docker/build.sh`
+- `--no-nvidia` オプションと `-cuda` サフィックス付きタグ生成を廃止（torch cu121 は pip 同梱 `nvidia-*` で完全自己完結するため CUDA 変種は存在意義がない）。
+- Dockerfile ビルド直後に `docker/slim.sh --mode buildable` を自動実行。
+- `:humble-latest` を `:humble-latest-runtime` のエイリアスとして付与（racingkart 等の下流互換性のため）。
+- BuildKit の `--allow=ssh` 明示対応。
+
+### `docker/slim.sh` (新規)
+`docker export | docker import` による flatten で、Docker union FS では物理削除できないサイズを回収する後処理スクリプト。
+- `--mode buildable` (default): colcon build 可用性を維持。`openjdk-*`, `/usr/lib/jvm`, `__pycache__`, 非英語 locale のみ削除。`/usr/lib/llvm-*` は Mesa の swrast/llvmpipe が `libLLVM.so` に動的リンクしているため保持（CPU-only インスタンスで rviz2 をソフトウェアレンダリング起動する際に必要）。
+- `--mode ml-only`: さらに C/C++ toolchain とヘッダーも削除（ML 学習専用、rclpy は削除される）。
+- 主要な cascading 事故を防ぐため `apt-mark manual` で `python3 / rclpy / ros-humble-ros-core` 等を保護。
+
+### `docker/test_ml_workspace.sh` (新規)
+ml_workspace の tiny_lidar_net パイプライン相当（torch GPU + TinyLidarNet 構築 + forward/backward/optim 5 step）を実データなしで回す smoke test。
+
+### `packages.txt` (拡充)
+`--no-nvidia` 化に伴い暗黙に欠落していた apt パッケージを明示追加:
+- ROS runtime/tooling: `ros-humble-xacro`, `ros-humble-topic-tools`, `ros-humble-nav2-msgs`
+- rviz2 系: `ros-humble-rviz2` + `rviz-common` / `rviz-default-plugins` / `rviz-rendering` / `rviz-ogre-vendor` / `rviz-assimp-vendor` （`autoware_overlay_rviz_plugin` が `ament_auto_find_build_dependencies` 経由で rviz_common 側の `find_dependency(Qt5)` に依存して `qt5_wrap_cpp` を取得する、暗黙の推移的連鎖を成立させるため）
+- Qt5 dev: `qtbase5-dev`, `qttools5-dev`
+- 地理測地: `libgeographic-dev`, `geographiclib-tools`
+- その他: `libboost-dev`, `python3-plotly`
+
+### `.github/workflows/update-docker-manifest.yaml`
+`latest-cuda` / `latest-prebuilt-cuda` エイリアスジョブ削除。
+
+### `docker/reduce.md` / `CLAUDE.md`
+運用注意と不変条件を更新。
+
+## Image size comparison
+
+| イメージ | Before | After | 削減 |
+| --- | --- | --- | --- |
+| **`ghcr.io/.../autoware-universe:humble-latest`** | **13.8 GB** | **7.56 GB** | **−6.24 GB (−45%)** |
+| `humble-latest-runtime` (= `humble-latest`) | 13.8 GB | 7.56 GB | −45% |
+| `humble-latest-devel` | 13.8 GB | ~12.0 GB | −13% |
+| `humble-latest-prebuilt` | — | 16.6 GB | (新規タグ) |
+| aichallenge-racingkart `aichallenge-2025-dev` (下流) | 旧 13.8GB ベース | 8.91 GB | — |
+
+## Verified items
+
+### Upstream (awsim-autoware)
+- [x] `./docker/build.sh` が成功 (`humble-latest-runtime` = 7.56 GB)
+- [x] `slim.sh --mode buildable` が自動実行され `.a` と C/C++ toolchain を保持
+- [x] `ARG SETUP_ARGS` 廃止後も CI `docker-build-and-push-main.yaml` が動く (matrix の `setup-args` は以後 no-op)
+- [x] torch 2.3.1+cu121 が `import torch; torch.cuda.is_available()` で `True`（RTX 2080 Ti 実機確認）
+- [x] gcc-11 / g++-11 / cc1 / Scrt1.o / crti.o が揃っており `echo 'int main(){}' | gcc -xc -` がリンクまで通る
+- [x] `#include <rclcpp/rclcpp.hpp>` が `/opt/ros/humble/include/rclcpp` から解決
+- [x] `/autoware/install/autoware_auto_control_msgs/include` 等の Autoware パッケージヘッダーが保持
+- [x] `/usr/local/cuda*` が存在しない（torch の ldd で `libcudart.so.12` が `/usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib/` から解決されることを確認）
+- [x] `/root/.local/share/pipx` が削除済（422 MB 回収）
+- [x] `/usr/share/doc` / `doc-base` / `info` / 非英語 locale 削除済
+
+### Downstream (aichallenge-racingkart)
+- [x] `./docker_build.sh dev` 成功 → `aichallenge-2025-dev:latest` (8.91 GB) ビルド
+- [x] `make autoware-build` で `colcon build` が 22/22 packages 成功（エラーゼロ、stderr 出力は ament の "header install destination" 警告のみ）
+- [x] `make dev` で AWSIM + Autoware の 2 コンテナ起動、20 秒以上連続稼働
+- [x] `ros2 node list` で Autoware ノード群が登録済（ekf_localizer, gyro_odometer, mpc_controller, racing_kart_gnss_poser, rviz2 等）
+- [x] `ros2 topic list` で AWSIM 連携 topic (`/awsim/control_cmd`, `/awsim/state` 等) と Autoware 制御 topic (`/control/command/control_cmd` 等) が publish されている
+- [x] `make down` でクリーンシャットダウン
+
+### ML training (ml_workspace/tiny_lidar_net) — GPU 実機
+- [x] `python3 train.py` が Hydra config を正しく読み込み
+- [x] `MultiSeqConcatDataset` で複数シーケンスを ConcatDataset 化（2 train seq + 1 val seq, 1000/200 samples）
+- [x] CUDA device (RTX 2080 Ti) 認識、`.to(device)` 成功
+- [x] Train/Val ループ 3 epochs 完走（15 iter/epoch × 3 + 4 val iter）
+- [x] Loss 0.7513 → 0.6118 へ単調減少（学習が実際に進んでいる）
+- [x] `best_model.pth` / `last_model.pth` 保存成功（`/tmp/ckpts/`）
+- [x] `convert_weight.py --model tinylidarnet --ckpt best_model.pth` が `weights/converted_weights.npy` を出力（deploy 用の .pth→.npy 変換）
+- [x] `hydra-core`, `omegaconf`, `tensorboard`, `h5py`, `hdf5plugin`, `jaxtyping`, `tqdm`, `rosbags` の import がすべて通る
+
+### 3 variant smoke test (GPU 学習 forward/backward) — 参考
+| Variant | Size | colcon build | rclpy | ML 学習 (GPU) |
+| --- | --- | --- | --- | --- |
+| A: Dockerfile のみ | 8.99 GB | ✅ | ✅ | ✅ |
+| B: slim.sh `--mode buildable` (本 PR 採用) | 6.5-7.6 GB | ✅ | ✅ | ✅ |
+| C: slim.sh `--mode ml-only` | 5.9 GB | ❌ | ❌ | ✅ |
+
+## Test plan
+
+- [x] `./docker/build.sh --clean-cache` (フレッシュビルド) で 7.56 GB の runtime image が生成される
+- [x] `aichallenge-racingkart` で `./docker_build.sh dev && make autoware-build && make dev` がエラーなく完走
+- [x] `docker run --gpus all aichallenge-2025-dev:latest python3 /aichallenge/ml_workspace/tiny_lidar_net/train.py ...` で実学習が回る
+- [x] `ros2 node list` / `ros2 topic list` で Autoware + AWSIM の通信を確認
+- [ ] GHCR に push して外部 CI / 参加者が新サイズの `humble-latest` を pull できること（別 PR で実施予定、権限調整待ち）
+
+## Known caveats
+
+1. **Autoware C++ の TensorRT/CUDA ノードはサポート外**: `--no-nvidia` 固定のため、tensorrt_yolo / lidar_centerpoint 等の CUDA ベースノードは実行不可。pytorch は pip 同梱 `nvidia-*` で動作する。必要になった場合は `docker/reduce.md` の巻き戻し手順を参照。
+2. **slim.sh は下流が `apt install` を再実行しても動くよう `/var/lib/apt/lists` を再取得可能な状態で保持**: ただし `apt-mark manual` による保護リストに無い `ros-humble-*` を purge する際はカスケードに注意。
+3. **`.a` / headers は意図的に保持**: `rviz_ogre_vendor` が `libOgreGLSupport.a` を `IMPORTED` target として export する CMake 設定があるため。削除すると下流 CMake が "file does not exist" で fail する（実機で再現確認済）。
+
+## References
+
+- `docker/reduce.md` — 本作業の経緯と Docker union FS 原則の詳説
+- `docker/slim.sh` — flatten 方式の後処理スクリプト
+- `docker/test_ml_workspace.sh` — ml_workspace 用 smoke test
diff --git a/docker/reduce.md b/docker/reduce.md
index c5f1819460c..91254716314 100644
--- a/docker/reduce.md
+++ b/docker/reduce.md
@@ -2,18 +2,20 @@
 
 ## サイズ削減結果
 
-| イメージ | Before | After (Dockerfile) | After (+ slim.sh) | 最終削減 |
-| --- | --- | --- | --- | --- |
-| **runtime** (= `:humble-latest`) | **13.8 GB** | **8.99 GB** | **6.5 GB** | **−7.3 GB (−53%)** |
-| devel | 13.8 GB | 12.1 GB | — | −1.7 GB (−12%) |
+| イメージ                         | Before      | After (Dockerfile) | After (+ slim.sh) | 最終削減           |
+| -------------------------------- | ----------- | ------------------ | ----------------- | ------------------ |
+| **runtime** (= `:humble-latest`) | **13.8 GB** | **8.99 GB**        | **6.5 GB**        | **−7.3 GB (−53%)** |
+| devel                            | 13.8 GB     | 12.1 GB            | —                 | −1.7 GB (−12%)     |
 
 `build.sh` は Dockerfile ビルド後に自動で `slim.sh --mode buildable` を実行し、最終 `:humble-latest-runtime` / `:humble-latest` を生成する。
 
 ## slim.sh の mode
+
 - **`--mode buildable`** (default, デフォルト採用): colcon build 可能性を維持。gcc-11, g++-11, cmake, /usr/include, /opt/ros/humble/include, libboost*-dev, libgdal-dev, libopenblas-dev を保持。openjdk / JVM / `__pycache__` / 非英語 locale を削除。`/usr/lib/llvm-*` は CPU ホストでの Mesa swrast / rviz2 ソフトウェアレンダリングに必要なため保持 → **6.5-7.6 GB**
 - **`--mode ml-only`**: ML 学習専用。上記に加えて C/C++ toolchain と全ヘッダーを削除。rclpy もカスケードで消える（ROS 実行不可）。ML 学習コードは `rosbags` pip パッケージ経由で bag 読込するため影響なし → **5.9 GB**
 
 ## 動作検証
+
 各 variant で `docker/test_ml_workspace.sh` により ML 学習 smoke test (torch GPU, TinyLidarNet モデル構築, 5-step 学習ループ) が PASS。
 
 > runtime には torch (cu121) を含めて GPU 推論を可能にしている。torch と同梱 CUDA ライブラリを外せば 3.81 GB まで落とせる。
@@ -79,15 +81,15 @@
 
 ## ビルド時間 (フレッシュビルド、キャッシュ無し)
 
-| ステップ | 所要時間 |
-|---------|---------|
-| setup-dev-env.sh | ~150 s |
-| apt (packages.txt) | ~10 s |
-| pip install (devel 内) | ~90 s |
-| vcs + rosdep install | ~90 s |
-| colcon build | ~10 分 |
-| runtime strip + cleanup | ~5 s |
-| **合計** | **約 20 分** |
+| ステップ                | 所要時間     |
+| ----------------------- | ------------ |
+| setup-dev-env.sh        | ~150 s       |
+| apt (packages.txt)      | ~10 s        |
+| pip install (devel 内)  | ~90 s        |
+| vcs + rosdep install    | ~90 s        |
+| colcon build            | ~10 分       |
+| runtime strip + cleanup | ~5 s         |
+| **合計**                | **約 20 分** |
 
 再ビルド時は apt/pip キャッシュマウントが効くため、これらのダウンロード分が省略される。
 
diff --git a/docker/slim.sh b/docker/slim.sh
index 6b1cef85d82..a62e4f0ca71 100755
--- a/docker/slim.sh
+++ b/docker/slim.sh
@@ -20,21 +20,30 @@ set -euo pipefail
 MODE="buildable"
 args=()
 while [ $# -gt 0 ]; do
-  case "$1" in
-    --mode) MODE="$2"; shift 2 ;;
-    *) args+=("$1"); shift ;;
-  esac
+    case "$1" in
+    --mode)
+        MODE="$2"
+        shift 2
+        ;;
+    *)
+        args+=("$1")
+        shift
+        ;;
+    esac
 done
 SRC="${args[0]:-ghcr.io/automotiveaichallenge/autoware-universe:humble-latest-runtime}"
 DST="${args[1]:-${SRC}-${MODE}}"
-[[ "$MODE" =~ ^(buildable|ml-only)$ ]] || { echo "invalid --mode: $MODE"; exit 2; }
+[[ $MODE =~ ^(buildable|ml-only)$ ]] || {
+    echo "invalid --mode: $MODE"
+    exit 2
+}
 
 echo "==> Source: $SRC"
 echo "==> Output: $DST"
 
 # Metadata to preserve across flatten.
 mapfile -t CHANGES < <(
-  docker inspect --format '
+    docker inspect --format '
 {{- range .Config.Env }}ENV {{ . }}
 {{ end -}}
 {{- range $k, $v := .Config.Labels }}LABEL {{ $k }}={{ $v }}
@@ -117,14 +126,17 @@ rm -rf /tmp/* /root/.cache /var/tmp/* 2>/dev/null || true
 
 echo "=== remaining top-level sizes (mode=$MODE) ==="
 du -sh /usr/* /opt/* /autoware/* /root/* 2>/dev/null | sort -rh | head -15
-' || { echo "cleanup failed"; exit 1; }
+' || {
+    echo "cleanup failed"
+    exit 1
+}
 
 docker stop "$CID" >/dev/null
 
 echo "==> Exporting + importing (flatten)…"
 change_args=()
 for c in "${CHANGES[@]}"; do
-  change_args+=(--change "$c")
+    change_args+=(--change "$c")
 done
 
 docker export "$CID" | docker import "${change_args[@]}" - "$DST"
@@ -132,9 +144,9 @@ docker export "$CID" | docker import "${change_args[@]}" - "$DST"
 SRC_SIZE=$(docker image inspect "$SRC" --format '{{.Size}}')
 DST_SIZE=$(docker image inspect "$DST" --format '{{.Size}}')
 printf '\n==> Size: %s (src) -> %s (dst, -%s)\n' \
-  "$(numfmt --to=iec "$SRC_SIZE")" \
-  "$(numfmt --to=iec "$DST_SIZE")" \
-  "$(numfmt --to=iec "$((SRC_SIZE - DST_SIZE))")"
+    "$(numfmt --to=iec "$SRC_SIZE")" \
+    "$(numfmt --to=iec "$DST_SIZE")" \
+    "$(numfmt --to=iec "$((SRC_SIZE - DST_SIZE))")"
 
 echo "==> Smoke test: torch + rclpy + colcon/gcc availability"
 docker run --rm --entrypoint bash "$DST" -c '
diff --git a/docker/test_ml_workspace.sh b/docker/test_ml_workspace.sh
index 863cb9a8f98..0191df41a46 100755
--- a/docker/test_ml_workspace.sh
+++ b/docker/test_ml_workspace.sh
@@ -11,17 +11,23 @@ IMG="${1:-ghcr.io/automotiveaichallenge/autoware-universe:humble-latest}"
 RACINGKART="${RACINGKART_DIR:-$HOME/aichallenge-racingkart}"
 ML_WS="$RACINGKART/aichallenge/ml_workspace"
 
-[ -d "$ML_WS/tiny_lidar_net" ] || { echo "ml_workspace not found at $ML_WS"; exit 1; }
-[ -f /tmp/ml_smoke.py ] || { echo "/tmp/ml_smoke.py missing"; exit 1; }
+[ -d "$ML_WS/tiny_lidar_net" ] || {
+    echo "ml_workspace not found at $ML_WS"
+    exit 1
+}
+[ -f /tmp/ml_smoke.py ] || {
+    echo "/tmp/ml_smoke.py missing"
+    exit 1
+}
 
 echo "==> Image: $IMG"
 docker image inspect "$IMG" --format 'size: {{.Size}} bytes' | numfmt --to=iec --field=2 -- || true
 
 docker run --rm --gpus all \
-  -v "$ML_WS:/aichallenge/ml_workspace:ro" \
-  -v /tmp/ml_smoke.py:/tmp/ml_smoke.py:ro \
-  --entrypoint bash \
-  "$IMG" -c '
+    -v "$ML_WS:/aichallenge/ml_workspace:ro" \
+    -v /tmp/ml_smoke.py:/tmp/ml_smoke.py:ro \
+    --entrypoint bash \
+    "$IMG" -c '
     set -e
     echo "=== pip install extras ==="
     python3 -m pip install --quiet --no-cache-dir \