diff --git a/.github/workflows/container-base-images.yml b/.github/workflows/container-base-images.yml index 51b8c5d03be3..cb2e97edb0fb 100644 --- a/.github/workflows/container-base-images.yml +++ b/.github/workflows/container-base-images.yml @@ -22,7 +22,10 @@ jobs: build-base-image: name: Build Base Container Image runs-on: ubuntu-latest - timeout-minutes: 45 + # The guestos-base images build a custom kernel from Ubuntu sources + # (see ic-os/guestos/context/Dockerfile.base), which on a 2-vCPU + # GitHub-hosted runner can take well over an hour. + timeout-minutes: 180 permissions: packages: write contents: write diff --git a/ic-os/guestos/context/Dockerfile.base b/ic-os/guestos/context/Dockerfile.base index 390641c0d473..0a862d2eb6de 100644 --- a/ic-os/guestos/context/Dockerfile.base +++ b/ic-os/guestos/context/Dockerfile.base @@ -35,6 +35,98 @@ RUN cd /tmp/ && \ echo "c46e5b6f53948477ff3a19d97c58307394a29fe64a01905646f026ddc32cb65b node_exporter-1.10.2.linux-amd64.tar.gz" > node_exporter.sha256 && \ sha256sum -c node_exporter.sha256 +# +# Kernel build stage: +# - Fetch Ubuntu's linux-hwe-6.17 source package +# - Apply local patches from kernel-patches/ (lexicographic order) +# - Build the "generic" flavor binary .deb packages +# +# The resulting .debs are consumed by the final image stage below, replacing +# the stock kernel that would otherwise be pulled in via apt. +# +FROM ubuntu:24.04 as kernel-build + +USER root:root + +ENV TZ=UTC +ENV DEBIAN_FRONTEND=noninteractive +ENV SOURCE_DATE_EPOCH=0 +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +# Source package to patch and rebuild. Must match (or be ABI-compatible with) +# the kernel metapackage installed in the final stage. +ARG _KERNEL_SOURCE_PACKAGE=linux-hwe-6.17 + +# Enable deb-src for Ubuntu's deb822 sources file and install build deps. +RUN sed -i 's/^Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/ubuntu.sources && \ + apt-get -y update && \ + apt-get -y --no-install-recommends install \ + ca-certificates \ + build-essential \ + fakeroot \ + devscripts \ + dpkg-dev \ + kmod \ + cpio \ + rsync \ + bc \ + bison \ + flex \ + libelf-dev \ + libssl-dev \ + libncurses-dev \ + dwarves \ + zstd \ + python3 \ + python3-debian \ + patch && \ + apt-get -y build-dep ${_KERNEL_SOURCE_PACKAGE} + +WORKDIR /build +RUN apt-get -y source ${_KERNEL_SOURCE_PACKAGE} + +COPY kernel-patches /tmp/kernel-patches + +# Apply all *.patch files in lexicographic order. Tolerate a missing/empty +# directory so that removing the patches (once upstream ships the fix) only +# requires deleting files under kernel-patches/. +# +# Note: /bin/sh in the base image is dash, which does not support "shopt", so +# we iterate explicitly and skip the loop when no *.patch files exist. +RUN set -eux; \ + srcdir="$(find /build -maxdepth 1 -mindepth 1 -type d -name 'linux-*' | head -n1)"; \ + test -n "$srcdir"; \ + cd "$srcdir"; \ + for p in /tmp/kernel-patches/*.patch; do \ + [ -e "$p" ] || continue; \ + echo "Applying $p"; \ + patch -p1 --no-backup-if-mismatch < "$p"; \ + done; \ + EDITOR=true DEBFULLNAME="IC GuestOS build" DEBEMAIL="devnull@dfinity.org" \ + debchange --local=+dfinity "Apply DFINITY custom kernel patches." + +# Build only the amd64 "generic" flavor, skipping debug and retpoline extras +# to cut build time. Signed image packages are not produced (and not needed +# for GuestOS, which signs kernels as part of the IC image build). +# DEB_BUILD_OPTIONS=parallel=N makes debian/rules fan out to $(nproc) jobs. +RUN set -eux; \ + srcdir="$(find /build -maxdepth 1 -mindepth 1 -type d -name 'linux-*' | head -n1)"; \ + cd "$srcdir"; \ + export DEB_BUILD_OPTIONS="parallel=$(nproc)"; \ + fakeroot debian/rules clean; \ + fakeroot debian/rules binary-generic skipdbg=true skipretpoline=true + +# Collect exactly the .debs needed to boot the final image: +# linux-image-unsigned--generic (vmlinuz + core modules) +# linux-modules--generic +# linux-modules-extra--generic +RUN set -eux; \ + mkdir /debs; \ + cp /build/linux-image-unsigned-*-generic_*_amd64.deb /debs/; \ + cp /build/linux-modules-*-generic_*_amd64.deb /debs/; \ + cp /build/linux-modules-extra-*-generic_*_amd64.deb /debs/; \ + ls -la /debs + # # Second build stage: # - Download and cache minimal Ubuntu Server 24.04 LTS Docker image @@ -53,18 +145,21 @@ ENV TZ=UTC # For the dev image, use both "packages.common" and "packages.dev" -- this can # be set via docker build args (see above). ARG PACKAGE_FILES=packages.common -# The kernel is installed here to keep the extra modules in sync. -# Unfortunately, there is no metapackage to track the extra modules that does -# not also include firmware. -ARG _KERNEL_PACKAGE=linux-image-virtual-hwe-24.04 +# The kernel is installed from locally built .deb packages produced by the +# kernel-build stage above, so that we can carry patches on top of Ubuntu's +# linux-hwe-6.17. The linux-image, linux-modules and linux-modules-extra +# packages are installed together to keep the extra modules in sync with the +# kernel ABI. RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY packages.* /tmp/ +COPY --from=kernel-build /debs /tmp/kernel-debs RUN apt-get -y update && \ apt-get -y upgrade && \ apt-get -y --no-install-recommends install $(for P in ${PACKAGE_FILES}; do cat /tmp/$P | sed -e "s/#.*//" ; done) \ - ${_KERNEL_PACKAGE} \ - linux-modules-extra-$(apt-cache depends ${_KERNEL_PACKAGE} | sed -n -e 's/ Depends: linux-image-\(.*\)-generic/\1/p')-generic && \ - rm /tmp/packages.* + /tmp/kernel-debs/linux-image-unsigned-*-generic_*_amd64.deb \ + /tmp/kernel-debs/linux-modules-*-generic_*_amd64.deb \ + /tmp/kernel-debs/linux-modules-extra-*-generic_*_amd64.deb && \ + rm -rf /tmp/packages.* /tmp/kernel-debs # Install node_exporter COPY --from=download /tmp/node_exporter-1.10.2.linux-amd64.tar.gz /tmp/node_exporter-1.10.2.linux-amd64.tar.gz diff --git a/ic-os/guestos/context/kernel-patches/0001-mm-huge_memory-fix-folio_split-race-condition.patch b/ic-os/guestos/context/kernel-patches/0001-mm-huge_memory-fix-folio_split-race-condition.patch new file mode 100644 index 000000000000..091b905c0555 --- /dev/null +++ b/ic-os/guestos/context/kernel-patches/0001-mm-huge_memory-fix-folio_split-race-condition.patch @@ -0,0 +1,58 @@ +From 03b75f017ffe6cf556fefbd44f44655bf4a9af48 Mon Sep 17 00:00:00 2001 +From: Zi Yan +Date: Fri, 27 Feb 2026 14:11:36 -0500 +Subject: [PATCH] mm/huge_memory: fix folio_split() race condition with + folio_try_get() + +During a pagecache folio split, the values in the related xarray should not +be changed from the original folio at xarray split time until all +after-split folios are ready and stored in the xarray. Otherwise, a +parallel folio_try_get() can see stale values in the xarray and a stale +value can be a unfrozen after-split folio. This leads to a wrong folio +returned to userspace. + +Backport of upstream commit 577a1f495fd78d8fb61b67ac3d3b595b01f6fcb0 +(merged in mainline v7.0-rc4, 2026-03; also applied to linux-6.18.y as +08b2b65c63bb26dbb2a4e2adc2ce96e2929b8b60 on 2026-03-25). Adapted to +Ubuntu's linux-hwe-6.17 6.17.0-22.22~24.04.1 tree, which predates the +upstream __split_unmapped_folio() refactor that introduced SPLIT_TYPE_* +and folio_split_supported(); the fix itself is identical in effect. + +As of 2026-04-20, the fix has NOT yet reached Ubuntu's linux-hwe-6.17 +package on noble (24.04). Reported against the IC GuestOS by +Bas van Dijk ; reproducer at +https://github.com/dfinity/thp-madv-remove-test. + +Signed-off-by: Zi Yan +--- + mm/huge_memory.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -3422,6 +3422,7 @@ + { + int order = folio_order(folio); + int start_order = uniform_split ? new_order : order - 1; ++ struct folio *origin_folio = folio; + bool stop_split = false; + struct folio *next; + int split_order; +@@ -3459,7 +3460,13 @@ + xas_split(xas, folio, old_order); + else { + xas_set_order(xas, folio->index, split_order); +- xas_try_split(xas, folio, old_order); ++ /* ++ * use the original folio, so that a ++ * parallel folio_try_get() waits on it ++ * until xarray is updated with after-split ++ * folios and the original one is unfreezed ++ */ ++ xas_try_split(xas, origin_folio, old_order); + if (xas_error(xas)) { + ret = xas_error(xas); + stop_split = true; +-- +2.51.0 diff --git a/ic-os/guestos/context/kernel-patches/README.md b/ic-os/guestos/context/kernel-patches/README.md new file mode 100644 index 000000000000..e1b77b9ea839 --- /dev/null +++ b/ic-os/guestos/context/kernel-patches/README.md @@ -0,0 +1,25 @@ +# GuestOS kernel patches + +Patches in this directory are applied (in lexicographic order) to Ubuntu's +`linux-hwe-6.17` source package in the `kernel-build` stage of +`Dockerfile.base`. The resulting `.deb` packages replace the stock kernel in +the final GuestOS base image. + +## Conventions + +- Name patches `NNNN-short-description.patch` so they apply in a deterministic + order. +- Each patch file must be a single-commit `git format-patch` output and apply + with `patch -p1` from the root of the kernel source tree. +- Include in the commit message: the upstream mainline commit SHA, the + upstream stable branch it has (or has not) landed on, and the reason for + carrying the patch locally. +- Remove a patch once it is no longer necessary (i.e. the Ubuntu package in + use already contains the fix). + +## Current patches + +- `0001-mm-huge_memory-fix-folio_split-race-condition.patch` — backport of + upstream `577a1f495fd78d8fb61b67ac3d3b595b01f6fcb0` ("mm/huge_memory: fix a + folio_split() race condition with folio_try_get()"). Drop once the Ubuntu + kernel in use (`linux-hwe-6.17` or a newer HWE track) ships this fix.