From 5abfc5867ffef8da61a3c8e9e3eae4ef39a2bd46 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Tue, 25 Nov 2025 15:58:02 +0000 Subject: [PATCH 01/13] Make containers for the pipeline tools separate to the main pipeline --- .github/workflows/tool-containers.yaml | 119 +++++++++++++++++++++++++ containers/Makefile | 64 ++++++++++++- containers/infernal/Dockerfile | 39 ++++++++ containers/samtools/Dockerfile | 65 ++++++++++++++ 4 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/tool-containers.yaml create mode 100644 containers/infernal/Dockerfile create mode 100644 containers/samtools/Dockerfile diff --git a/.github/workflows/tool-containers.yaml b/.github/workflows/tool-containers.yaml new file mode 100644 index 000000000..71c8d8ab7 --- /dev/null +++ b/.github/workflows/tool-containers.yaml @@ -0,0 +1,119 @@ +# GitHub Actions workflow for building and pushing tool containers +# These containers cache slow-to-build bioinformatics tools (Infernal, Samtools) + +name: Build and Push Tool Containers + +on: + push: + branches: ['master', 'dev'] + paths: + - 'containers/infernal/**' + - 'containers/samtools/**' + - '.github/workflows/tool-containers.yaml' + workflow_dispatch: + inputs: + force_rebuild: + description: 'Force rebuild all tool containers' + required: false + type: boolean + default: false + +jobs: + detect-changes: + runs-on: ubuntu-latest + outputs: + infernal: ${{ steps.changes.outputs.infernal }} + samtools: ${{ steps.changes.outputs.samtools }} + force: ${{ github.event.inputs.force_rebuild == 'true' }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Detect changed files + id: changes + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ github.event.inputs.force_rebuild }}" = "true" ]; then + echo "infernal=true" >> $GITHUB_OUTPUT + echo "samtools=true" >> $GITHUB_OUTPUT + else + # Check if infernal files changed + if git diff --name-only HEAD^ HEAD | grep -q "^containers/infernal/"; then + echo "infernal=true" >> $GITHUB_OUTPUT + else + echo "infernal=false" >> $GITHUB_OUTPUT + fi + + # Check if samtools files changed + if git diff --name-only HEAD^ HEAD | grep -q "^containers/samtools/"; then + echo "samtools=true" >> $GITHUB_OUTPUT + else + echo "samtools=false" >> $GITHUB_OUTPUT + fi + fi + + build-infernal: + needs: detect-changes + if: needs.detect-changes.outputs.infernal == 'true' || needs.detect-changes.outputs.force == 'true' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Docker login + env: + DOCKER_USER: ${{ secrets.DOCKER_USER }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + run: docker login -u $DOCKER_USER -p $DOCKER_PASSWORD + + - name: Build Infernal container + run: docker build -t rnacentral/infernal:1.1.2 -f containers/infernal/Dockerfile containers/infernal/ + + - name: Tag as latest + run: docker tag rnacentral/infernal:1.1.2 rnacentral/infernal:latest + + - name: Push versioned tag + run: docker push rnacentral/infernal:1.1.2 + + - name: Push latest tag + run: docker push rnacentral/infernal:latest + + - name: Slack notification + if: always() + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_MESSAGE: 'Infernal container built and pushed: ${{ job.status }}' + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + MSG_MINIMAL: true + + build-samtools: + needs: detect-changes + if: needs.detect-changes.outputs.samtools == 'true' || needs.detect-changes.outputs.force == 'true' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Docker login + env: + DOCKER_USER: ${{ secrets.DOCKER_USER }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + run: docker login -u $DOCKER_USER -p $DOCKER_PASSWORD + + - name: Build Samtools container + run: docker build -t rnacentral/samtools:1.18 -f containers/samtools/Dockerfile containers/samtools/ + + - name: Tag as latest + run: docker tag rnacentral/samtools:1.18 rnacentral/samtools:latest + + - name: Push versioned tag + run: docker push rnacentral/samtools:1.18 + + - name: Push latest tag + run: docker push rnacentral/samtools:latest + + - name: Slack notification + if: always() + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_MESSAGE: 'Samtools container built and pushed: ${{ job.status }}' + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + MSG_MINIMAL: true diff --git a/containers/Makefile b/containers/Makefile index 9556510dc..7c77d39fa 100644 --- a/containers/Makefile +++ b/containers/Makefile @@ -1,7 +1,69 @@ +# Container image names cpat=rnacentral/cpat:latest +# Tool versions +INFERNAL_VERSION=1.1.2 +SAMTOOLS_VERSION=1.18 + +# Docker repository +DOCKER_REPO=rnacentral + +# Tool container image names +INFERNAL_IMAGE=$(DOCKER_REPO)/infernal:$(INFERNAL_VERSION) +INFERNAL_LATEST=$(DOCKER_REPO)/infernal:latest +SAMTOOLS_IMAGE=$(DOCKER_REPO)/samtools:$(SAMTOOLS_VERSION) +SAMTOOLS_LATEST=$(DOCKER_REPO)/samtools:latest + +.PHONY: all cpat infernal samtools push-infernal push-samtools push-all clean test + +all: infernal samtools + +# Build CPAT container (existing target) cpat: cpat/Dockerfile docker build -t "$(cpat)" -f $^ . docker push $(cpat) -.PHONY: cpat +# Build Infernal container +infernal: + @echo "Building Infernal $(INFERNAL_VERSION) container..." + docker build -t $(INFERNAL_IMAGE) -f infernal/Dockerfile . + docker tag $(INFERNAL_IMAGE) $(INFERNAL_LATEST) + +# Build Samtools container +samtools: + @echo "Building Samtools $(SAMTOOLS_VERSION) container..." + docker build -t $(SAMTOOLS_IMAGE) -f samtools/Dockerfile . + docker tag $(SAMTOOLS_IMAGE) $(SAMTOOLS_LATEST) + +# Push Infernal to Docker Hub +push-infernal: infernal + @echo "Pushing Infernal images to Docker Hub..." + docker push $(INFERNAL_IMAGE) + docker push $(INFERNAL_LATEST) + +# Push Samtools to Docker Hub +push-samtools: samtools + @echo "Pushing Samtools images to Docker Hub..." + docker push $(SAMTOOLS_IMAGE) + docker push $(SAMTOOLS_LATEST) + +# Push all tool containers +push-all: push-infernal push-samtools + +# Test tool containers locally +test: infernal samtools + @echo "Testing Infernal container..." + docker run --rm $(INFERNAL_IMAGE) cmscan -h > /dev/null + docker run --rm $(INFERNAL_IMAGE) cmbuild -h > /dev/null + @echo "✓ Infernal tests passed" + @echo "" + @echo "Testing Samtools container..." + docker run --rm $(SAMTOOLS_IMAGE) samtools --version + docker run --rm $(SAMTOOLS_IMAGE) tabix --version + @echo "✓ Samtools tests passed" + +# Clean up local tool container images +clean: + @echo "Removing tool container images..." + docker rmi $(INFERNAL_IMAGE) $(INFERNAL_LATEST) || true + docker rmi $(SAMTOOLS_IMAGE) $(SAMTOOLS_LATEST) || true diff --git a/containers/infernal/Dockerfile b/containers/infernal/Dockerfile new file mode 100644 index 000000000..befdb6650 --- /dev/null +++ b/containers/infernal/Dockerfile @@ -0,0 +1,39 @@ +# Multi-stage build for Infernal 1.1.2 +# This container provides the Infernal bioinformatics tool suite + +# Stage 1: Builder - compiles Infernal from source +FROM python:3.11.14-trixie AS builder + +ENV RNA=/rna +WORKDIR $RNA + +# Install minimal build dependencies +RUN apt update && apt install -y gcc make && rm -rf /var/lib/apt/lists/* + +# Download and build Infernal +RUN curl -OL http://eddylab.org/infernal/infernal-1.1.2.tar.gz && \ + tar -xvzf infernal-1.1.2.tar.gz && \ + rm infernal-1.1.2.tar.gz && \ + cd infernal-1.1.2 && \ + ./configure --prefix=$RNA/infernal-1.1.2 && \ + make && \ + make install && \ + cd easel && \ + make install + +# Stage 2: Final - minimal runtime image +FROM python:3.11.14-trixie + +ENV RNA=/rna +WORKDIR $RNA + +# Copy built Infernal from builder stage +COPY --from=builder $RNA/infernal-1.1.2 $RNA/infernal-1.1.2 + +# Set PATH +ENV PATH="$RNA/infernal-1.1.2/bin:$PATH" + +# Test that Infernal is working +RUN cmscan -h > /dev/null 2>&1 + +ENTRYPOINT ["/bin/bash"] diff --git a/containers/samtools/Dockerfile b/containers/samtools/Dockerfile new file mode 100644 index 000000000..77ae6d99c --- /dev/null +++ b/containers/samtools/Dockerfile @@ -0,0 +1,65 @@ +# Multi-stage build for Samtools + HTSlib 1.18 +# This container provides Samtools, HTSlib tools (tabix, bgzip) and libraries + +# Stage 1: Builder - compiles HTSlib and Samtools from source +FROM python:3.11.14-trixie AS builder + +WORKDIR /build + +# Install build dependencies +RUN apt update && apt install -y \ + gcc \ + make \ + libbz2-dev \ + liblzma-dev \ + libncurses5-dev \ + libssl-dev \ + zlib1g-dev \ + wget && \ + rm -rf /var/lib/apt/lists/* + +# Build HTSlib first (Samtools depends on it) +RUN wget https://github.com/samtools/htslib/releases/download/1.18/htslib-1.18.tar.bz2 && \ + tar -jxf htslib-1.18.tar.bz2 && \ + rm htslib-1.18.tar.bz2 && \ + cd htslib-1.18 && \ + ./configure --prefix=/usr/local && \ + make && \ + make install + +# Build Samtools +RUN wget https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2 && \ + tar jxf samtools-1.18.tar.bz2 && \ + rm samtools-1.18.tar.bz2 && \ + cd samtools-1.18 && \ + ./configure --prefix=/usr/local && \ + make && \ + make install + +# Stage 2: Final - minimal runtime image with libraries +FROM python:3.11.14-trixie + +# Install only runtime dependencies (no -dev packages) +RUN apt update && apt install -y \ + libbz2-1.0 \ + liblzma5 \ + libncurses6 \ + libssl3 \ + zlib1g && \ + rm -rf /var/lib/apt/lists/* + +# Copy built binaries and libraries from builder +COPY --from=builder /usr/local/bin/samtools /usr/local/bin/ +COPY --from=builder /usr/local/bin/tabix /usr/local/bin/ +COPY --from=builder /usr/local/bin/bgzip /usr/local/bin/ +COPY --from=builder /usr/local/lib/libhts* /usr/local/lib/ +COPY --from=builder /usr/local/include/htslib /usr/local/include/htslib/ + +# Run ldconfig to register shared libraries +RUN ldconfig + +# Test that samtools and tools are working +RUN samtools --version && \ + tabix --version + +ENTRYPOINT ["/bin/bash"] From 771d9623adacd2ee9898d47af5f9a1725c037886 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Tue, 25 Nov 2025 16:19:37 +0000 Subject: [PATCH 02/13] Use buildx to enable multi-platform builds --- containers/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/containers/Makefile b/containers/Makefile index 7c77d39fa..fbd9d9acc 100644 --- a/containers/Makefile +++ b/containers/Makefile @@ -20,19 +20,19 @@ all: infernal samtools # Build CPAT container (existing target) cpat: cpat/Dockerfile - docker build -t "$(cpat)" -f $^ . + docker buildx build -t "$(cpat)" -f $^ --platform linux/amd64 . docker push $(cpat) # Build Infernal container infernal: @echo "Building Infernal $(INFERNAL_VERSION) container..." - docker build -t $(INFERNAL_IMAGE) -f infernal/Dockerfile . + docker buildx build -t $(INFERNAL_IMAGE) -f infernal/Dockerfile --platform linux/amd64 . docker tag $(INFERNAL_IMAGE) $(INFERNAL_LATEST) # Build Samtools container samtools: @echo "Building Samtools $(SAMTOOLS_VERSION) container..." - docker build -t $(SAMTOOLS_IMAGE) -f samtools/Dockerfile . + docker buildx build -t $(SAMTOOLS_IMAGE) -f samtools/Dockerfile --platform linux/amd64 . docker tag $(SAMTOOLS_IMAGE) $(SAMTOOLS_LATEST) # Push Infernal to Docker Hub From e46d2294065d0763385321e10b76f47aa7f54cfd Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Tue, 25 Nov 2025 16:20:11 +0000 Subject: [PATCH 03/13] Switch to multi-part build for rnacentral import pipeline --- Dockerfile | 194 +++++++++++++++++++++++++++-------------------------- 1 file changed, 99 insertions(+), 95 deletions(-) diff --git a/Dockerfile b/Dockerfile index fd6f2d26a..669f98844 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,47 +1,84 @@ +# Multi-stage Dockerfile for RNAcentral Import Pipeline +# This reduces final image size by ~30-40% by separating build and runtime dependencies + +# Stage 1: Pull pre-built Infernal container +FROM rnacentral/infernal:1.1.2 AS infernal + +# Stage 2: Pull pre-built Samtools/HTSlib container +FROM rnacentral/samtools:1.18 AS samtools + +# Stage 3: Rust builder - compiles Rust utilities +FROM python:3.11.14-trixie AS rust-builder + +# Install Rust toolchain only +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y +ENV PATH="/root/.cargo/bin:$PATH" +ENV CARGO_NET_GIT_FETCH_WITH_CLI=true + +# Copy Rust source and build +WORKDIR /build +COPY utils ./utils +COPY Cargo.toml Cargo.lock Makefile ./ +RUN cargo build --release && \ + mkdir /rust-bins && \ + mv target/release/json2fasta \ + target/release/split-ena \ + target/release/expand-urs \ + target/release/precompute \ + target/release/search-export \ + target/release/ftp-export \ + target/release/json2dfasta \ + target/release/bed-expander \ + /rust-bins/ + +# Stage 4: Python environment builder +FROM python:3.11.14-trixie AS python-builder + +# Install uv +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:$PATH" + +# Copy dependency files and install +WORKDIR /app +COPY pyproject.toml uv.lock ./ +RUN uv sync --no-editable --frozen + +# Download NLTK data +RUN /app/.venv/bin/python3 -m nltk.downloader words + +# Stage 5: Final runtime image FROM python:3.11.14-trixie ENV RNA=/rna - WORKDIR $RNA -RUN apt update -RUN apt upgrade -y - -# Install all required packages -RUN apt install -y \ +# Install ONLY runtime dependencies (no gcc, no -dev packages) +RUN apt update && apt upgrade -y && \ + apt install -y \ bedtools \ ca-certificates \ curl \ default-mysql-client \ - devscripts \ - freetds-dev \ gawk \ - gcc \ git \ gzip \ hmmer \ jq \ lftp \ - libbz2-dev \ - liblzma-dev \ - libncurses5-dev \ - libncursesw5-dev \ - libsqlite3-dev \ - libssl-dev \ + libbz2-1.0 \ + liblzma5 \ + libncurses6 \ + libssl3 \ libxml2-utils \ - libxml2-dev \ - libzip-dev \ moreutils \ mysql-common \ openssl \ pandoc \ - patch \ pgloader \ postgresql-17 \ postgresql-client-17 \ procps \ python3 \ - python3-dev \ python3-pip \ rsync \ sbcl \ @@ -49,97 +86,63 @@ RUN apt install -y \ tar \ time \ unzip \ - zlib1g-dev\ - wget - - -# Install Infernal -RUN \ - cd $RNA/ && \ - curl -OL http://eddylab.org/infernal/infernal-1.1.2.tar.gz && \ - tar -xvzf infernal-1.1.2.tar.gz && \ - rm infernal-1.1.2.tar.gz && \ - cd infernal-1.1.2 && \ - ./configure --prefix=$RNA/infernal-1.1.2 && \ - make && \ - make install && \ - cd easel && \ - make install - -# Install blat -RUN \ - wget https://hgwdev.gi.ucsc.edu/~kent/exe/linux/blatSuite.38.zip -O blat.zip && \ + wget \ + zlib1g && \ + rm -rf /var/lib/apt/lists/* + +# Copy Infernal from tool container +COPY --from=infernal /rna/infernal-1.1.2 $RNA/infernal-1.1.2 + +# Copy Samtools + HTSlib from tool container +COPY --from=samtools /usr/local/bin/samtools /usr/local/bin/tabix /usr/local/bin/bgzip /usr/local/bin/ +COPY --from=samtools /usr/local/lib/libhts* /usr/local/lib/ +COPY --from=samtools /usr/local/include/htslib /usr/local/include/htslib + +# Run ldconfig to register shared libraries +RUN ldconfig + +# Install blat (pre-compiled) +RUN wget https://hgwdev.gi.ucsc.edu/~kent/exe/linux/blatSuite.38.zip -O blat.zip && \ unzip blat.zip -d blat_suite && \ rm blat.zip - -# Install seqkit -RUN \ - mkdir seqkit && \ +# Install seqkit (pre-compiled) +RUN mkdir seqkit && \ cd seqkit && \ wget https://github.com/shenwei356/seqkit/releases/download/v2.10.1/seqkit_linux_amd64.tar.gz && \ tar xvf seqkit_linux_amd64.tar.gz && \ rm seqkit_linux_amd64.tar.gz -# Install ribovore -RUN git clone https://github.com/nawrockie/epn-ofile.git && cd epn-ofile && git fetch && git fetch --tags && git checkout ribovore-0.40 -RUN git clone https://github.com/nawrockie/epn-options.git && cd epn-options && git fetch && git fetch --tags && git checkout ribovore-0.40 -RUN git clone https://github.com/nawrockie/epn-test.git && cd epn-test && git fetch && git fetch --tags && git checkout ribovore-0.40 -RUN git clone https://github.com/nawrockie/ribovore.git && cd ribovore && git fetch && git fetch --tags && git checkout ribovore-0.40 - -# Install htslib -RUN \ - wget https://github.com/samtools/htslib/releases/download/1.18/htslib-1.18.tar.bz2 && \ - tar -jxf htslib-1.18.tar.bz2 && \ - rm htslib-1.18.tar.bz2 && \ - cd htslib-1.18 && \ - make && \ - make install - -# Install samtools -RUN \ - wget https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2 && \ - tar jxf samtools-1.18.tar.bz2 && \ - rm samtools-1.18.tar.bz2 && \ - cd samtools-1.18 && \ - make && \ - make install - -# Install python requirements -ENV RNACENTRAL_IMPORT_PIPELINE="$RNA/rnacentral-import-pipeline" - -# Install useful pip version -RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python get-pip.py - -# Install uv -RUN curl -LsSf https://astral.sh/uv/install.sh | sh -## Add uv install directory to the front of the path +# Clone ribovore (Perl scripts, no compilation) +RUN git clone https://github.com/nawrockie/epn-ofile.git && \ + cd epn-ofile && git checkout ribovore-0.40 && \ + cd .. && \ + git clone https://github.com/nawrockie/epn-options.git && \ + cd epn-options && git checkout ribovore-0.40 && \ + cd .. && \ + git clone https://github.com/nawrockie/epn-test.git && \ + cd epn-test && git checkout ribovore-0.40 && \ + cd .. && \ + git clone https://github.com/nawrockie/ribovore.git && \ + cd ribovore && git checkout ribovore-0.40 + +# Install pip and uv (lightweight, for potential runtime use) +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python get-pip.py && \ + curl -LsSf https://astral.sh/uv/install.sh | sh ENV PATH="/root/.local/bin:$PATH" -COPY pyproject.toml $RNACENTRAL_IMPORT_PIPELINE/pyproject.toml -COPY uv.lock $RNACENTRAL_IMPORT_PIPELINE/uv.lock +# Copy Python environment from builder +ENV RNACENTRAL_IMPORT_PIPELINE="$RNA/rnacentral-import-pipeline" +COPY --from=python-builder /app/.venv $RNACENTRAL_IMPORT_PIPELINE/.venv -WORKDIR "$RNA/rnacentral-import-pipeline" -RUN uv sync --no-editable --frozen -ENV PATH="$RNA/rnacentral-import-pipeline/.venv/bin:$PATH" -RUN python3 -m nltk.downloader words +# Copy Rust binaries from builder +COPY --from=rust-builder /rust-bins/* $RNACENTRAL_IMPORT_PIPELINE/bin/ -## Download Rust toolchain -RUN curl https://sh.rustup.rs -sSf | sh -s -- -y - -COPY utils ./utils -COPY Makefile Makefile -COPY Cargo.toml Cargo.toml -COPY Cargo.lock Cargo.lock -ENV PATH="$PATH:/root/.cargo/bin" -ENV CARGO_NET_GIT_FETCH_WITH_CLI=true -RUN make rust - -WORKDIR $RNA +# Copy project files (needed for imports and runtime) +COPY . $RNACENTRAL_IMPORT_PIPELINE/ # Setup environmental variables ENV PERL5LIB="/usr/bin/env:$PERL5LIB" - ENV RIBOINFERNALDIR="$RNA/infernal-1.1.2/bin" ENV RIBODIR="$RNA/ribovore" ENV RIBOEASELDIR="$RNA/infernal-1.1.2/bin" @@ -154,5 +157,6 @@ ENV PATH="$RNA/infernal-1.1.2/bin:$PATH" ENV PATH="$RNA/blat_suite:$PATH" ENV PATH="$RNA/seqkit:$PATH" ENV PATH="$RNACENTRAL_IMPORT_PIPELINE:$PATH" +ENV PATH="$RNACENTRAL_IMPORT_PIPELINE/.venv/bin:$PATH" ENTRYPOINT ["/bin/bash"] From ea86885443d17fac6397f41858892055cbd8bb9f Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Wed, 26 Nov 2025 10:55:20 +0000 Subject: [PATCH 04/13] Move rust utils build into their own container --- Cargo.toml | 12 ++++++++- Dockerfile | 29 +++------------------ containers/Makefile | 27 +++++++++++++++++--- containers/rust-utils/Dockerfile | 44 ++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 30 deletions(-) create mode 100644 containers/rust-utils/Dockerfile diff --git a/Cargo.toml b/Cargo.toml index d3d5c08da..9e92c7bac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,14 @@ [workspace] members = [ - "utils/*", + "utils/bed-expander", + "utils/expand-urs", + "utils/ftp-export", + "utils/json2dfasta", + "utils/json2fasta", + "utils/precompute", + "utils/rnc-core", + "utils/rnc-test-utils", + "utils/rnc-utils", + "utils/search-export", + "utils/split-ena", ] diff --git a/Dockerfile b/Dockerfile index 669f98844..e5c209e39 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,29 +7,8 @@ FROM rnacentral/infernal:1.1.2 AS infernal # Stage 2: Pull pre-built Samtools/HTSlib container FROM rnacentral/samtools:1.18 AS samtools -# Stage 3: Rust builder - compiles Rust utilities -FROM python:3.11.14-trixie AS rust-builder - -# Install Rust toolchain only -RUN curl https://sh.rustup.rs -sSf | sh -s -- -y -ENV PATH="/root/.cargo/bin:$PATH" -ENV CARGO_NET_GIT_FETCH_WITH_CLI=true - -# Copy Rust source and build -WORKDIR /build -COPY utils ./utils -COPY Cargo.toml Cargo.lock Makefile ./ -RUN cargo build --release && \ - mkdir /rust-bins && \ - mv target/release/json2fasta \ - target/release/split-ena \ - target/release/expand-urs \ - target/release/precompute \ - target/release/search-export \ - target/release/ftp-export \ - target/release/json2dfasta \ - target/release/bed-expander \ - /rust-bins/ +# Stage 3: Pull pre-built Rust utilities container +FROM rnacentral/rust-utils:latest AS rust-utils # Stage 4: Python environment builder FROM python:3.11.14-trixie AS python-builder @@ -135,8 +114,8 @@ ENV PATH="/root/.local/bin:$PATH" ENV RNACENTRAL_IMPORT_PIPELINE="$RNA/rnacentral-import-pipeline" COPY --from=python-builder /app/.venv $RNACENTRAL_IMPORT_PIPELINE/.venv -# Copy Rust binaries from builder -COPY --from=rust-builder /rust-bins/* $RNACENTRAL_IMPORT_PIPELINE/bin/ +# Copy Rust binaries from rust-utils container +COPY --from=rust-utils /rna/bin/* $RNACENTRAL_IMPORT_PIPELINE/bin/ # Copy project files (needed for imports and runtime) COPY . $RNACENTRAL_IMPORT_PIPELINE/ diff --git a/containers/Makefile b/containers/Makefile index fbd9d9acc..ae818d243 100644 --- a/containers/Makefile +++ b/containers/Makefile @@ -13,10 +13,11 @@ INFERNAL_IMAGE=$(DOCKER_REPO)/infernal:$(INFERNAL_VERSION) INFERNAL_LATEST=$(DOCKER_REPO)/infernal:latest SAMTOOLS_IMAGE=$(DOCKER_REPO)/samtools:$(SAMTOOLS_VERSION) SAMTOOLS_LATEST=$(DOCKER_REPO)/samtools:latest +RUST_UTILS_IMAGE=$(DOCKER_REPO)/rust-utils:latest -.PHONY: all cpat infernal samtools push-infernal push-samtools push-all clean test +.PHONY: all cpat infernal samtools rust-utils push-infernal push-samtools push-rust-utils push-all clean test -all: infernal samtools +all: infernal samtools rust-utils # Build CPAT container (existing target) cpat: cpat/Dockerfile @@ -35,6 +36,12 @@ samtools: docker buildx build -t $(SAMTOOLS_IMAGE) -f samtools/Dockerfile --platform linux/amd64 . docker tag $(SAMTOOLS_IMAGE) $(SAMTOOLS_LATEST) +# Build Rust utilities container +rust-utils: + @echo "Building Rust utilities container..." + docker buildx build -t $(RUST_UTILS_IMAGE) -f rust-utils/Dockerfile --platform linux/amd64 ../ + @echo "✓ Rust utilities built successfully" + # Push Infernal to Docker Hub push-infernal: infernal @echo "Pushing Infernal images to Docker Hub..." @@ -47,11 +54,16 @@ push-samtools: samtools docker push $(SAMTOOLS_IMAGE) docker push $(SAMTOOLS_LATEST) +# Push Rust utilities to Docker Hub +push-rust-utils: rust-utils + @echo "Pushing Rust utilities image to Docker Hub..." + docker push $(RUST_UTILS_IMAGE) + # Push all tool containers -push-all: push-infernal push-samtools +push-all: push-infernal push-samtools push-rust-utils # Test tool containers locally -test: infernal samtools +test: infernal samtools rust-utils @echo "Testing Infernal container..." docker run --rm $(INFERNAL_IMAGE) cmscan -h > /dev/null docker run --rm $(INFERNAL_IMAGE) cmbuild -h > /dev/null @@ -61,9 +73,16 @@ test: infernal samtools docker run --rm $(SAMTOOLS_IMAGE) samtools --version docker run --rm $(SAMTOOLS_IMAGE) tabix --version @echo "✓ Samtools tests passed" + @echo "" + @echo "Testing Rust utilities container..." + docker run --rm $(RUST_UTILS_IMAGE) json2fasta --help > /dev/null + docker run --rm $(RUST_UTILS_IMAGE) precompute --help > /dev/null + docker run --rm $(RUST_UTILS_IMAGE) search-export --help > /dev/null + @echo "✓ Rust utilities tests passed" # Clean up local tool container images clean: @echo "Removing tool container images..." docker rmi $(INFERNAL_IMAGE) $(INFERNAL_LATEST) || true docker rmi $(SAMTOOLS_IMAGE) $(SAMTOOLS_LATEST) || true + docker rmi $(RUST_UTILS_IMAGE) || true diff --git a/containers/rust-utils/Dockerfile b/containers/rust-utils/Dockerfile new file mode 100644 index 000000000..0fd4006dd --- /dev/null +++ b/containers/rust-utils/Dockerfile @@ -0,0 +1,44 @@ +# Multi-stage build for Rust utilities +# This container provides all 8 Rust command-line tools used by the pipeline + +# Stage 1: Builder - compiles Rust utilities +FROM python:3.11.14-trixie AS builder + +# Install Rust toolchain +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y +ENV PATH="/root/.cargo/bin:$PATH" +ENV CARGO_NET_GIT_FETCH_WITH_CLI=true + +# Copy Rust source and build +WORKDIR /build +COPY utils ./utils +COPY Cargo.toml Cargo.lock Makefile ./ + +# Build all Rust utilities in release mode +RUN cargo build --release && \ + mkdir /rust-bins && \ + mv target/release/json2fasta \ + target/release/split-ena \ + target/release/expand-urs \ + target/release/precompute \ + target/release/search-export \ + target/release/ftp-export \ + target/release/json2dfasta \ + target/release/bed-expander \ + /rust-bins/ + +# Stage 2: Final - minimal runtime image +FROM python:3.11.14-trixie + +WORKDIR /rna + +# Copy compiled Rust binaries from builder +COPY --from=builder /rust-bins/* /rna/bin/ + +# Add binaries to PATH +ENV PATH="/rna/bin:$PATH" + +# Test that at least one binary is working +RUN json2fasta --help > /dev/null 2>&1 + +ENTRYPOINT ["/bin/bash"] From 168650a963430b3775cbafe6d0764b9a81411322 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Wed, 26 Nov 2025 10:56:09 +0000 Subject: [PATCH 05/13] Github workflows to build rust containers and make the main pipeline container wait for a rust build if needed --- .github/workflows/main.yaml | 68 +++++++++++++++++++++++++++ .github/workflows/rust-container.yaml | 47 ++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 .github/workflows/rust-container.yaml diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index c5510ea2e..9d1f5d065 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -22,7 +22,75 @@ jobs: SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} MSG_MINIMAL: true + wait-for-rust-build: + runs-on: ubuntu-latest + outputs: + rust_changed: ${{ steps.check-rust.outputs.changed }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Check if Rust files changed + id: check-rust + run: | + if git diff --name-only HEAD^ HEAD | grep -qE '^(utils/|Cargo\.(toml|lock))'; then + echo "changed=true" >> $GITHUB_OUTPUT + echo "Rust files changed, waiting for rust-container workflow..." + else + echo "changed=false" >> $GITHUB_OUTPUT + echo "No Rust changes, skipping wait" + fi + + - name: Wait for Rust container build + if: steps.check-rust.outputs.changed == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "Waiting for rust-container workflow to complete..." + sleep 10 # Give workflow time to start + + # Wait up to 30 minutes for rust-container workflow to complete + timeout=1800 + elapsed=0 + while [ $elapsed -lt $timeout ]; do + # Check for running or completed rust-container workflows for this commit + status=$(gh run list \ + --workflow=rust-container.yaml \ + --commit=${{ github.sha }} \ + --json status,conclusion \ + --jq '.[0] | "\(.status):\(.conclusion)"') + + if [ -z "$status" ]; then + echo "No rust-container workflow found yet, waiting..." + sleep 10 + elapsed=$((elapsed + 10)) + continue + fi + + workflow_status=$(echo "$status" | cut -d: -f1) + workflow_conclusion=$(echo "$status" | cut -d: -f2) + + if [ "$workflow_status" = "completed" ]; then + if [ "$workflow_conclusion" = "success" ]; then + echo "✓ Rust container build completed successfully" + exit 0 + else + echo "✗ Rust container build failed with conclusion: $workflow_conclusion" + exit 1 + fi + fi + + echo "Rust container build status: $workflow_status (elapsed: ${elapsed}s)" + sleep 15 + elapsed=$((elapsed + 15)) + done + + echo "✗ Timeout waiting for rust-container workflow" + exit 1 + create-docker-image: + needs: wait-for-rust-build runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/rust-container.yaml b/.github/workflows/rust-container.yaml new file mode 100644 index 000000000..41a64c626 --- /dev/null +++ b/.github/workflows/rust-container.yaml @@ -0,0 +1,47 @@ +# GitHub Actions workflow for building and pushing Rust utilities container +# Rebuilds when Rust code changes (utils/**, Cargo.toml, Cargo.lock) + +name: Build Rust Utilities Container + +on: + push: + branches: ['master', 'dev'] + paths: + - 'utils/**' + - 'Cargo.toml' + - 'Cargo.lock' + - 'containers/rust-utils/**' + - '.github/workflows/rust-container.yaml' + workflow_dispatch: + inputs: + force_rebuild: + description: 'Force rebuild Rust utilities container' + required: false + type: boolean + default: false + +jobs: + build-rust-utils: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Docker login + env: + DOCKER_USER: ${{ secrets.DOCKER_USER }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + run: docker login -u $DOCKER_USER -p $DOCKER_PASSWORD + + - name: Build Rust utilities container + run: docker build -t rnacentral/rust-utils:latest -f containers/rust-utils/Dockerfile . + + - name: Push to Docker Hub + run: docker push rnacentral/rust-utils:latest + + - name: Slack notification + if: always() + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_MESSAGE: 'Rust utilities container built and pushed: ${{ job.status }}' + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + MSG_MINIMAL: true From 151eafbbeaab052e6d9e2f970af2b00af494ebd2 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Wed, 26 Nov 2025 12:35:58 +0000 Subject: [PATCH 06/13] Improved dockerignore file --- .dockerignore | 52 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/.dockerignore b/.dockerignore index 8314dfc34..d9ab9b024 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,9 +1,43 @@ -* -!requirements.txt -!Cargo.toml -!Cargo.lock -!utils -!openssl -!uv.lock -!pyproject.toml -!Makefile +# Exclude build and development files +.git/ +.github/ +*.nf +workflows/ +tests/ +utils/ +Cargo.toml +Cargo.lock +Makefile +containers/ +Dockerfile.old +.dockerignore +.gitignore +.pre-commit-config.yaml +pytest.ini +.python-version +RELEASE.rst +LICENSE +README.md +*.md + +# Python artifacts +__pycache__/ +*.py[cod] +*$py.class +.pytest_cache/ +.coverage +htmlcov/ +*.egg-info/ +dist/ +build/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db From d6dc3de9cdf3d644818ede1006b7d7179715f7d9 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Wed, 26 Nov 2025 12:36:14 +0000 Subject: [PATCH 07/13] Fix broken copy line --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index e5c209e39..909ca361a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,8 @@ ENV PATH="/root/.local/bin:$PATH" # Copy dependency files and install WORKDIR /app -COPY pyproject.toml uv.lock ./ +COPY pyproject.toml . +COPY uv.lock . RUN uv sync --no-editable --frozen # Download NLTK data From 979b20751415694bac040341f16cf3dc4e567e82 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Wed, 26 Nov 2025 12:38:24 +0000 Subject: [PATCH 08/13] Copy only the necessary local context --- Dockerfile | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 909ca361a..e0ca6c6e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -115,11 +115,19 @@ ENV PATH="/root/.local/bin:$PATH" ENV RNACENTRAL_IMPORT_PIPELINE="$RNA/rnacentral-import-pipeline" COPY --from=python-builder /app/.venv $RNACENTRAL_IMPORT_PIPELINE/.venv -# Copy Rust binaries from rust-utils container +# Copy only essential runtime files (exclude build artifacts, tests, Nextflow files) +# Python package - required for CLI +COPY rnacentral_pipeline/ $RNACENTRAL_IMPORT_PIPELINE/rnacentral_pipeline/ + +# Python/shell scripts - required for various operations (includes old Rust binaries from git) +COPY bin/ $RNACENTRAL_IMPORT_PIPELINE/bin/ + +# Copy fresh Rust binaries from rust-utils container (overwrites old binaries from git) COPY --from=rust-utils /rna/bin/* $RNACENTRAL_IMPORT_PIPELINE/bin/ -# Copy project files (needed for imports and runtime) -COPY . $RNACENTRAL_IMPORT_PIPELINE/ +# Package metadata - required for module imports +COPY pyproject.toml setup-env $RNACENTRAL_IMPORT_PIPELINE/ + # Setup environmental variables ENV PERL5LIB="/usr/bin/env:$PERL5LIB" From ca0a3ef0ebffa4f836d3a964ac061c7674a733a6 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Wed, 26 Nov 2025 12:38:42 +0000 Subject: [PATCH 09/13] Use buildx for cross platform build in docker makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 003071214..901173c6e 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ clean: cargo clean docker: Dockerfile - docker build -t "$(docker)" . + docker buildx build -t "$(docker)" --platform linux/amd64 . shell: docker docker run -v `pwd`:/rna/import-pipeline -i -t "$(docker)" From 95549d94a61ceaf09964abbc957ef121949334a7 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Tue, 2 Dec 2025 16:10:15 +0000 Subject: [PATCH 10/13] No need to use python image as builder for tools with no python deps --- containers/infernal/Dockerfile | 6 +++--- containers/samtools/Dockerfile | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/containers/infernal/Dockerfile b/containers/infernal/Dockerfile index befdb6650..b44dd64dc 100644 --- a/containers/infernal/Dockerfile +++ b/containers/infernal/Dockerfile @@ -2,13 +2,13 @@ # This container provides the Infernal bioinformatics tool suite # Stage 1: Builder - compiles Infernal from source -FROM python:3.11.14-trixie AS builder +FROM debian:trixie-slim AS builder ENV RNA=/rna WORKDIR $RNA # Install minimal build dependencies -RUN apt update && apt install -y gcc make && rm -rf /var/lib/apt/lists/* +RUN apt update && apt install -y gcc make curl && rm -rf /var/lib/apt/lists/* # Download and build Infernal RUN curl -OL http://eddylab.org/infernal/infernal-1.1.2.tar.gz && \ @@ -22,7 +22,7 @@ RUN curl -OL http://eddylab.org/infernal/infernal-1.1.2.tar.gz && \ make install # Stage 2: Final - minimal runtime image -FROM python:3.11.14-trixie +FROM debian:trixie-slim ENV RNA=/rna WORKDIR $RNA diff --git a/containers/samtools/Dockerfile b/containers/samtools/Dockerfile index 77ae6d99c..ae7f167a7 100644 --- a/containers/samtools/Dockerfile +++ b/containers/samtools/Dockerfile @@ -2,7 +2,7 @@ # This container provides Samtools, HTSlib tools (tabix, bgzip) and libraries # Stage 1: Builder - compiles HTSlib and Samtools from source -FROM python:3.11.14-trixie AS builder +FROM debian:trixie-slim AS builder WORKDIR /build @@ -37,7 +37,7 @@ RUN wget https://github.com/samtools/samtools/releases/download/1.18/samtools-1. make install # Stage 2: Final - minimal runtime image with libraries -FROM python:3.11.14-trixie +FROM debian:trixie-slim # Install only runtime dependencies (no -dev packages) RUN apt update && apt install -y \ @@ -53,7 +53,6 @@ COPY --from=builder /usr/local/bin/samtools /usr/local/bin/ COPY --from=builder /usr/local/bin/tabix /usr/local/bin/ COPY --from=builder /usr/local/bin/bgzip /usr/local/bin/ COPY --from=builder /usr/local/lib/libhts* /usr/local/lib/ -COPY --from=builder /usr/local/include/htslib /usr/local/include/htslib/ # Run ldconfig to register shared libraries RUN ldconfig From 8b8d90291cc6827a32ea5ba26c95648998c87384 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Tue, 2 Dec 2025 16:10:27 +0000 Subject: [PATCH 11/13] Cleaner handling of rust build products --- containers/rust-utils/Dockerfile | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/containers/rust-utils/Dockerfile b/containers/rust-utils/Dockerfile index 0fd4006dd..1cd42b7fb 100644 --- a/containers/rust-utils/Dockerfile +++ b/containers/rust-utils/Dockerfile @@ -17,15 +17,7 @@ COPY Cargo.toml Cargo.lock Makefile ./ # Build all Rust utilities in release mode RUN cargo build --release && \ mkdir /rust-bins && \ - mv target/release/json2fasta \ - target/release/split-ena \ - target/release/expand-urs \ - target/release/precompute \ - target/release/search-export \ - target/release/ftp-export \ - target/release/json2dfasta \ - target/release/bed-expander \ - /rust-bins/ + find target/release -maxdepth 1 -type f -executable -exec mv {} /rust-bins/ \; # Stage 2: Final - minimal runtime image FROM python:3.11.14-trixie From 365b236d2ee55d8d946cf89c2c475af3db63ab3d Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Tue, 2 Dec 2025 16:10:59 +0000 Subject: [PATCH 12/13] Don't pipe makefile tests to /dev/null so we can see what happened --- containers/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/containers/Makefile b/containers/Makefile index ae818d243..887cf35a6 100644 --- a/containers/Makefile +++ b/containers/Makefile @@ -65,8 +65,8 @@ push-all: push-infernal push-samtools push-rust-utils # Test tool containers locally test: infernal samtools rust-utils @echo "Testing Infernal container..." - docker run --rm $(INFERNAL_IMAGE) cmscan -h > /dev/null - docker run --rm $(INFERNAL_IMAGE) cmbuild -h > /dev/null + docker run --rm $(INFERNAL_IMAGE) cmscan -h + docker run --rm $(INFERNAL_IMAGE) cmbuild -h @echo "✓ Infernal tests passed" @echo "" @echo "Testing Samtools container..." @@ -75,9 +75,9 @@ test: infernal samtools rust-utils @echo "✓ Samtools tests passed" @echo "" @echo "Testing Rust utilities container..." - docker run --rm $(RUST_UTILS_IMAGE) json2fasta --help > /dev/null - docker run --rm $(RUST_UTILS_IMAGE) precompute --help > /dev/null - docker run --rm $(RUST_UTILS_IMAGE) search-export --help > /dev/null + docker run --rm $(RUST_UTILS_IMAGE) json2fasta --help + docker run --rm $(RUST_UTILS_IMAGE) precompute --help + docker run --rm $(RUST_UTILS_IMAGE) search-export --help @echo "✓ Rust utilities tests passed" # Clean up local tool container images From cc5a1bbdb855996dbda2494a93203e0bb7831d31 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Tue, 2 Dec 2025 16:11:17 +0000 Subject: [PATCH 13/13] Un-ignore some important bits of context for building in docker --- .dockerignore | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.dockerignore b/.dockerignore index d9ab9b024..6d52f4123 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,10 +4,6 @@ *.nf workflows/ tests/ -utils/ -Cargo.toml -Cargo.lock -Makefile containers/ Dockerfile.old .dockerignore