From 26091c7a2e6c50f1c07f01b553bca5816900cb24 Mon Sep 17 00:00:00 2001
From: Antoine Phan <antoine.phan@mail.mcgill.ca>
Date: Tue, 13 May 2025 13:03:23 -0400
Subject: [PATCH 1/2] Rebuilding Dockerfile for GPU-Celery, checkpoint with
 tensorflow image

---
 gpu-celery/Dockerfile | 204 +++++++++++-------------------------------
 1 file changed, 50 insertions(+), 154 deletions(-)

diff --git a/gpu-celery/Dockerfile b/gpu-celery/Dockerfile
index fd44c14f2..854d88806 100644
--- a/gpu-celery/Dockerfile
+++ b/gpu-celery/Dockerfile
@@ -1,149 +1,47 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-#
-# THIS IS A GENERATED DOCKERFILE.
-#
-# This file was assembled from multiple pieces, whose use is documented
-# throughout. Please refer to the TensorFlow dockerfiles documentation
-# for more information.
-
-ARG UBUNTU_VERSION=18.04
-ARG CUDA=11.2
-FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.2-base-ubuntu${UBUNTU_VERSION} as base
-# ARCH and CUDA are specified again because the FROM directive resets ARGs
-# (but their default value is retained if set previously)
-ARG ARCH
-ARG CUDA
-ARG CUDNN=8.1.0.77-1
-ARG CUDNN_MAJOR_VERSION=8
-ARG LIB_DIR_PREFIX=x86_64
-ARG LIBNVINFER=7.2.2-1
-ARG LIBNVINFER_MAJOR_VERSION=7
-
-# The following two arguments are rodan-specific
-ARG BRANCHES
-ARG VERSION
-
-# Needed for string substitution
+# ARG UBUNTU_VERSION=24.04
+# ARG CUDA=12.9.0
+# ARG IMAGE_VER=cudnn-runtime-ubuntu$UBUNTU_VERSION
+
+# ARG BASEIMAGE="nvidia/cuda:${CUDA}-${IMAGE_VER}"
+ARG BASEIMAGE="tensorflow/tensorflow:latest-gpu"
+
+FROM ${BASEIMAGE}
+
 SHELL ["/bin/bash", "-c"]
-# Pick up some TF dependencies
-#RUN apt-get update 
-
-#RUN rm  -rf /etc/apt/sources.list.d/cuda.list
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        cuda-command-line-tools-${CUDA/./-} \
-        libcublas-${CUDA/./-} \
-        cuda-nvrtc-${CUDA/./-} \
-        libcufft-${CUDA/./-} \
-        libcurand-${CUDA/./-} \
-        libcusolver-${CUDA/./-} \
-        libcusparse-${CUDA/./-} \
-        curl \
-        libcudnn8=${CUDNN}+cuda${CUDA} \
-        libfreetype6-dev \
-        libhdf5-serial-dev \
-        libzmq3-dev \
-        pkg-config \
-        software-properties-common \
-        unzip \
-		wget
-# added wget		
-
-# Install TensorRT if not building for PowerPC
-# NOTE: libnvinfer uses cuda11.1 versions
-# RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
-#         apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \
-#         libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \
-#         && apt-get clean \
-#         && rm -rf /var/lib/apt/lists/*; }
-
-# For CUDA profiling, TensorFlow requires CUPTI.
-ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH
-
-# Link the libcuda stub to the location where tensorflow is searching for it and reconfigure
-# dynamic linker run-time bindings
-RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \
-    && echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/z-cuda-stubs.conf \
-    && ldconfig
-
-# See http://bugs.python.org/issue19846
-ENV LANG C.UTF-8
-
-# This section differs from the default tensorflow2.5.1 Dockerfile, because we specifically add python 3.7;
-ARG PYTHON=python3.7
-ARG TF_PACKAGE=tensorflow
-ARG TF_PACKAGE_VERSION=2.5.1
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    python3.7 \
-    python3-pip \
-    && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/* \
-  && wget https://bootstrap.pypa.io/pip/3.7/get-pip.py \
-  && ${PYTHON} get-pip.py \
-  && ln -sf /usr/bin/${PYTHON} /usr/local/bin/python3 \
-  && ln -sf /usr/local/bin/pip /usr/local/bin/pip3 \
-  && pip3 --no-cache-dir install --upgrade pip setuptools==57.0.0 \
-  # Some TF tools expect a "python" binary
-  && ln -s $(which python3) /usr/local/bin/python \
-  && python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
-
-# RUN ln -s $(which python3) /usr/local/bin/python
-
-# Options:
-#   tensorflow
-#   tensorflow-gpu
-#   tf-nightly
-#   tf-nightly-gpu
-# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
-# Installs the latest version by default.
-
-# COPY bashrc /etc/bash.bashrc
-# RUN chmod a+rwx /etc/bash.bashrc
-
-# This ends the material obtained from TensorFlow's dockerfile. the remainder is rodan-docker-specific setup.
-
-# FROM base
-RUN set -e \
-  && apt-get update \
-  && DEBIAN_FRONTEND="noninteractive" apt-get install -yqq \
-    git \
-    # Python lxml dependencies
-    python3.7-dev \
-    python3-opencv \
-    libxml2-dev \
-    libxslt1-dev \
-    zlib1g-dev \
-    lib32ncurses5-dev \
-    # Psycopg2 dependencies
-    libpq-dev \
-    # For resource identification
-    libmagic-dev \
-    unzip \
-    # Remove when done
-    vim
-
-RUN rm -rf /var/lib/apt/lists/*
-
-# Bandaid fix for the cannot import name '_registerMatType' from 'cv2.cv2' issue
-#RUN pip uninstall opencv-python-headless && pip install opencv-python-headless==4.1.2.30
+
+RUN apt-get update
+# RUN apt-get install -y --no-install-recommends \
+#    wget \
+#    ca-certificates \
+#    git \
+#    build-essential \
+#    dh-make \
+#    fakeroot \
+#    devscripts \
+#    lsb-release
+
+# ENV LANG=C.UTF-8
+
+# Install python
+#RUN apt-get install -y --no-install-recommends \
+#    python3 python3-pip python3-venv
+
+# WORKDIR missing
+# ENV VIRTUAL_ENV=.venv
+# RUN python3 -m venv .venv
+# ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# RUN source .venv/bin/activate
+# RUN pip3 --no-cache-dir install --upgrade pip setuptools
+
+# use .venv/bin/python3
+# we want tensorflow 2.5.1?
+# Other possible options: tensorflow, tensorflow-gpu, tf-nightly?, tf-nightly-gpu?
+# RUN pip3 install tensorflow
+
 
 # Install GPU Rodan Jobs
+
 COPY ./scripts/install_gpu_rodan_jobs /opt/
 
 # Install Rodan
@@ -158,21 +56,18 @@ COPY ./rodan-main/code /code/Rodan
 
 # necessary for scikit-image > 0.17, or else it will try to make a cache directory
 # in a place where the www-data user does not have permissions to do so
-ENV SKIMAGE_DATADIR "/tmp/.skimage_cache"
-
+ENV SKIMAGE_DATADIR="/tmp/.skimage_cache"
 
-RUN set -x \
-  # Create Folders
-  && mkdir -p /code/jobs \
+RUN set -x
+RUN mkdir -p /code/jobs
   # Install GPU Jobs
-  && chmod +x /opt/install_gpu_rodan_jobs \
-  && /opt/install_gpu_rodan_jobs \
-  # Install Rodan
-  && sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt \
+RUN chmod +x /opt/install_gpu_rodan_jobs
+RUN /opt/install_gpu_rodan_jobs ## issue here!!!
+RUN sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt
   # && sed -i "s/pybagit==1.5.0/-e git:\/\/github.com\/deepio\/pybagit.git@a27c9e0fc3bdf99dab8bd327f3ce9ea884abd6b4#egg=pybagit/g" /code/Rodan/requirements.txt \
   # Add Entrypoints
-  && sed -i 's/\r//' /opt/entrypoint \
-  && chmod +x /opt/entrypoint \
+RUN sed -i 's/\r//' /opt/entrypoint \
+RUN chmod +x /opt/entrypoint \
   # Add Celery script
   && chmod +x /run/start-celery \
   # Change the concurency for gpu jobs because Calvo is very expensive
@@ -187,3 +82,4 @@ RUN pip3 uninstall -y opencv-python opencv-python-headless
 RUN pip3 install opencv-python-headless==4.1.2.30
 
 ENTRYPOINT ["/opt/entrypoint"]
+

From 93b880038890b900bc252255bd157090dd76c40a Mon Sep 17 00:00:00 2001
From: Antoine Phan <antoine.phan@mail.mcgill.ca>
Date: Thu, 15 May 2025 12:26:56 -0400
Subject: [PATCH 2/2] Update Dockerfile rebuild progress

---
 gpu-celery/Dockerfile | 105 ++++++++++++++++++++++++------------------
 1 file changed, 59 insertions(+), 46 deletions(-)

diff --git a/gpu-celery/Dockerfile b/gpu-celery/Dockerfile
index 854d88806..c522012ff 100644
--- a/gpu-celery/Dockerfile
+++ b/gpu-celery/Dockerfile
@@ -1,85 +1,98 @@
-# ARG UBUNTU_VERSION=24.04
-# ARG CUDA=12.9.0
-# ARG IMAGE_VER=cudnn-runtime-ubuntu$UBUNTU_VERSION
+# Based on the tensorflow version used in the old Dockerfile
+# - Dockerfile from PR#751: https://github.com/DDMAL/Rodan/blob/d28e06a0a6c2440fa76b47f46cf13d269de1952f/gpu-celery/Dockerfile
+# - Old verison of the Dockerfile
 
-# ARG BASEIMAGE="nvidia/cuda:${CUDA}-${IMAGE_VER}"
-ARG BASEIMAGE="tensorflow/tensorflow:latest-gpu"
 
-FROM ${BASEIMAGE}
+# ---- Builder image ----
+FROM alpine:3 AS builder
+ARG BRANCH
+ENV BRANCH="${BRANCH:-develop}"
+RUN apk update
+RUN apk add git
+# Download Rodan from the repository, develop branch
+WORKDIR /
+RUN git clone --recurse-submodules -b "${BRANCH}" https://github.com/DDMAL/Rodan
 
-SHELL ["/bin/bash", "-c"]
 
-RUN apt-get update
-# RUN apt-get install -y --no-install-recommends \
-#    wget \
-#    ca-certificates \
-#    git \
-#    build-essential \
-#    dh-make \
-#    fakeroot \
-#    devscripts \
-#    lsb-release
+# ---- Tensorflow image ----
+# FROM nvidia/cuda:12.9.0-runtime-ubuntu24.04
+FROM tensorflow/tensorflow:2.19.0-gpu
+SHELL ["/bin/bash", "-c"]
 
-# ENV LANG=C.UTF-8
+# Working from the root folder
+WORKDIR /
 
-# Install python
-#RUN apt-get install -y --no-install-recommends \
-#    python3 python3-pip python3-venv
+# Update packages
+RUN apt-get update 
 
-# WORKDIR missing
-# ENV VIRTUAL_ENV=.venv
-# RUN python3 -m venv .venv
-# ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+# Install 
+# RUN apt-get install python3 python3-pip python3-venv git -y
 
-# RUN source .venv/bin/activate
-# RUN pip3 --no-cache-dir install --upgrade pip setuptools
+RUN apt-get install -yqq git \
+    python3-lxml \
+    # Psycopg2 dependencies
+    libpq-dev \
+    # OpenCV dependencies
+    libsm6 libxext6 libxrender-dev libglib2.0-data \
+    python3-opencv \
+    # For resource identification
+    libmagic-dev \
+    unzip
 
-# use .venv/bin/python3
-# we want tensorflow 2.5.1?
-# Other possible options: tensorflow, tensorflow-gpu, tf-nightly?, tf-nightly-gpu?
-# RUN pip3 install tensorflow
+RUN rm -rf /var/lib/apt/lists/*
 
+# Bandaid fix for the cannot import name '_registerMatType' from 'cv2.cv2' issue
+# RUN pip uninstall opencv-python-headless && pip install opencv-python-headless==4.1.2.30
 
-# Install GPU Rodan Jobs
+# Python virtual environment
+ENV VIRTUAL_ENV=/venv
+RUN python3 -m venv $VIRTUAL_ENV
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
+# --- Install GPU Rodan Jobs ---
+# Copy files from scripts to /opt/ and /run/
 COPY ./scripts/install_gpu_rodan_jobs /opt/
-
-# Install Rodan
-# Runs on both Rodan service, and Rodan-Celery
 COPY ./scripts/entrypoint /opt/
 COPY ./scripts/start-celery /run/
 COPY ./scripts/wait-for-app /run/
+COPY ./rodan-main/code /code/Rodan
 
+# Runs on both Rodan service, and Rodan-Celery
 # Copying rodan core from build context into container
 # Rodan folder MUST be uppercase, otherwise many unittests fail.
-COPY ./rodan-main/code /code/Rodan
 
 # necessary for scikit-image > 0.17, or else it will try to make a cache directory
 # in a place where the www-data user does not have permissions to do so
 ENV SKIMAGE_DATADIR="/tmp/.skimage_cache"
 
+
+# RUN apt-get update
+# RUN apt-get install git -y
+# RUN apt-get upgrade -y
+# RUN pip install --upgrade pip
+
 RUN set -x
 RUN mkdir -p /code/jobs
   # Install GPU Jobs
 RUN chmod +x /opt/install_gpu_rodan_jobs
 RUN /opt/install_gpu_rodan_jobs ## issue here!!!
-RUN sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt
+# RUN sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt
   # && sed -i "s/pybagit==1.5.0/-e git:\/\/github.com\/deepio\/pybagit.git@a27c9e0fc3bdf99dab8bd327f3ce9ea884abd6b4#egg=pybagit/g" /code/Rodan/requirements.txt \
   # Add Entrypoints
-RUN sed -i 's/\r//' /opt/entrypoint \
-RUN chmod +x /opt/entrypoint \
+#RUN sed -i 's/\r//' /opt/entrypoint \
+#RUN chmod +x /opt/entrypoint \
   # Add Celery script
-  && chmod +x /run/start-celery \
+#RUN chmod +x /run/start-celery \
   # Change the concurency for gpu jobs because Calvo is very expensive
-  && sed -i "s/=10/=1/g" /run/start-celery \
+#RUN sed -i "s/=10/=1/g" /run/start-celery \
   # Script to wait for postgres and redis to be running before attempting to connect to them.
-  && chmod +x /run/wait-for-app
+#RUN chmod +x /run/wait-for-app
 
 # Install Rodan.
-RUN pip3 install -r /code/Rodan/requirements.txt
+# RUN pip install -r /code/Rodan/requirements.txt
 
-RUN pip3 uninstall -y opencv-python opencv-python-headless
-RUN pip3 install opencv-python-headless==4.1.2.30
+# RUN pip uninstall -y opencv-python opencv-python-headless
+# RUN pip install opencv-python-headless==4.1.2.30
 
-ENTRYPOINT ["/opt/entrypoint"]
+# ENTRYPOINT ["/opt/entrypoint"]