From a2e571d986eb7a3e9bf610ac24846b4202d28e50 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Tue, 2 Jul 2024 02:01:06 +0000 Subject: [PATCH 1/7] upgrade tf --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3f722b6..207b86c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,7 +47,7 @@ RUN R -e "reticulate::conda_create(envname = \"${ENV_NAME}\", conda = \"auto\", ENV RETICULATE_PYTHON="${MINICONDA_PATH}/envs/${ENV_NAME}/bin/python" # Tensorflow and Keras -ARG TENSORFLOW_VERSION=2.11.0 +ARG TENSORFLOW_VERSION=2.12.0 RUN R -e "keras::install_keras(tensorflow = \"${TENSORFLOW_VERSION}\", extra_packages = c(\"pandas\", \"numpy\", \"pycryptodome\"), method=\"conda\", envname=\"${ENV_NAME}\")" # Install kaggle libraries. From 49e841fe9eee3656406aa535dfd1d38e848628eb Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Tue, 2 Jul 2024 04:18:47 +0000 Subject: [PATCH 2/7] gpu + racecar --- Jenkinsfile | 139 +++++++++++++++++++++++++------------------------ gpu.Dockerfile | 2 +- 2 files changed, 71 insertions(+), 70 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a9a8d29..35a64af 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -22,80 +22,81 @@ pipeline { } stages { - stage('Docker CPU Build') { - steps { - sh '''#!/bin/bash - set -exo pipefail - - ./build | ts - date - ./push ${PRETEST_TAG} - ''' - } - } - - stage('Test CPU Image') { - steps { - sh '''#!/bin/bash - set -exo pipefail - - date - ./test --image gcr.io/kaggle-images/rstats:${PRETEST_TAG} - ''' - } - } - - stage('Docker GPU Build') { - agent { label 'ephemeral-linux-gpu' } - steps { - sh '''#!/bin/bash - set -exo pipefail - # Remove images (dangling or not) created more than 120h (5 days ago) to prevent disk from filling up. - docker image prune --all --force --filter "until=120h" --filter "label=kaggle-lang=r" - # Remove any dangling images (no tags). - # All builds for the same branch uses the same tag. This means a subsequent build for the same branch - # will untag the previously built image which is safe to do. Builds for a single branch are performed - # serially. - docker image prune -f - ./build --gpu --base-image-tag ${PRETEST_TAG} | ts - date - ./push --gpu ${PRETEST_TAG} - ''' - } - } - - stage('Test GPU Image') { - agent { label 'ephemeral-linux-gpu' } - steps { - sh '''#!/bin/bash - set -exo pipefail - date - ./test --gpu --image gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} - ''' - } - } - - stage('Package Versions') { + stage('Build/Test/Diff') { parallel { - stage('CPU Diff') { - steps { - sh '''#!/bin/bash - set -exo pipefail + stage('CPU') { + stages { + stage('Docker CPU Build') { + steps { + sh '''#!/bin/bash + set -exo pipefail + ./build | ts + date + ./push ${PRETEST_TAG} + ''' + } + } + stage('Test CPU Image') { + steps { + sh '''#!/bin/bash + set -exo pipefail + date + ./test --image gcr.io/kaggle-images/rstats:${PRETEST_TAG} + ''' + } + } + stage('CPU Diff') { + steps { + sh '''#!/bin/bash + set -exo pipefail - docker pull gcr.io/kaggle-images/rstats:${PRETEST_TAG} - ./diff --target gcr.io/kaggle-images/rstats:${PRETEST_TAG} - ''' + docker pull gcr.io/kaggle-images/rstats:${PRETEST_TAG} + ./diff --target gcr.io/kaggle-images/rstats:${PRETEST_TAG} + ''' + } + } } } - stage('GPU Diff') { + + stage('GPU') { agent { label 'ephemeral-linux-gpu' } - steps { - sh '''#!/bin/bash - set -exo pipefail - - docker pull gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} - ./diff --gpu --target gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} - ''' + stages{ + stage('Docker GPU Build') { + steps { + sh '''#!/bin/bash + set -exo pipefail + # Remove images (dangling or not) created more than 120h (5 days ago) to prevent disk from filling up. + docker image prune --all --force --filter "until=120h" --filter "label=kaggle-lang=r" + # Remove any dangling images (no tags). + # All builds for the same branch uses the same tag. This means a subsequent build for the same branch + # will untag the previously built image which is safe to do. Builds for a single branch are performed + # serially. + docker image prune -f + ./build --gpu --base-image-tag ${PRETEST_TAG} | ts + date + ./push --gpu ${PRETEST_TAG} + ''' + } + } + stage('Test GPU Image') { + steps { + sh '''#!/bin/bash + set -exo pipefail + date + ./test --gpu --image gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} + ''' + } + } + stage('GPU Diff') { + steps { + sh '''#!/bin/bash + set -exo pipefail + + docker pull gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} + ./diff --gpu --target gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} + ''' + } + } } } } diff --git a/gpu.Dockerfile b/gpu.Dockerfile index ca9a034..82eff32 100644 --- a/gpu.Dockerfile +++ b/gpu.Dockerfile @@ -56,7 +56,7 @@ ENV CUDA_HOME=/usr/local/cuda ADD ldpaths $R_HOME/etc/ldpaths # Install tensorflow with GPU support -ARG TENSORFLOW_VERSION=2.11.0 +ARG TENSORFLOW_VERSION=2.12.0 RUN R -e "keras::install_keras(version = \"${TENSORFLOW_VERSION}-gpu\", method = \"conda\", conda = \"auto\", envname=\"r-reticulate\")" && \ rm -rf /tmp/tensorflow_gpu && \ /tmp/clean-layer.sh From 077db0f6986e8bd57e48d9454245ebd624ec2b04 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Tue, 2 Jul 2024 04:49:23 +0000 Subject: [PATCH 3/7] gpu + racecars --- gpu.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu.Dockerfile b/gpu.Dockerfile index 82eff32..ca9a034 100644 --- a/gpu.Dockerfile +++ b/gpu.Dockerfile @@ -56,7 +56,7 @@ ENV CUDA_HOME=/usr/local/cuda ADD ldpaths $R_HOME/etc/ldpaths # Install tensorflow with GPU support -ARG TENSORFLOW_VERSION=2.12.0 +ARG TENSORFLOW_VERSION=2.11.0 RUN R -e "keras::install_keras(version = \"${TENSORFLOW_VERSION}-gpu\", method = \"conda\", conda = \"auto\", envname=\"r-reticulate\")" && \ rm -rf /tmp/tensorflow_gpu && \ /tmp/clean-layer.sh From f9bb17587ba313df51decb74eecb1515857da445 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Tue, 2 Jul 2024 16:21:57 +0000 Subject: [PATCH 4/7] undo parr --- Jenkinsfile | 139 ++++++++++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 70 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 35a64af..a9a8d29 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -22,81 +22,80 @@ pipeline { } stages { - stage('Build/Test/Diff') { + stage('Docker CPU Build') { + steps { + sh '''#!/bin/bash + set -exo pipefail + + ./build | ts + date + ./push ${PRETEST_TAG} + ''' + } + } + + stage('Test CPU Image') { + steps { + sh '''#!/bin/bash + set -exo pipefail + + date + ./test --image gcr.io/kaggle-images/rstats:${PRETEST_TAG} + ''' + } + } + + stage('Docker GPU Build') { + agent { label 'ephemeral-linux-gpu' } + steps { + sh '''#!/bin/bash + set -exo pipefail + # Remove images (dangling or not) created more than 120h (5 days ago) to prevent disk from filling up. + docker image prune --all --force --filter "until=120h" --filter "label=kaggle-lang=r" + # Remove any dangling images (no tags). + # All builds for the same branch uses the same tag. This means a subsequent build for the same branch + # will untag the previously built image which is safe to do. Builds for a single branch are performed + # serially. + docker image prune -f + ./build --gpu --base-image-tag ${PRETEST_TAG} | ts + date + ./push --gpu ${PRETEST_TAG} + ''' + } + } + + stage('Test GPU Image') { + agent { label 'ephemeral-linux-gpu' } + steps { + sh '''#!/bin/bash + set -exo pipefail + date + ./test --gpu --image gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} + ''' + } + } + + stage('Package Versions') { parallel { - stage('CPU') { - stages { - stage('Docker CPU Build') { - steps { - sh '''#!/bin/bash - set -exo pipefail - ./build | ts - date - ./push ${PRETEST_TAG} - ''' - } - } - stage('Test CPU Image') { - steps { - sh '''#!/bin/bash - set -exo pipefail - date - ./test --image gcr.io/kaggle-images/rstats:${PRETEST_TAG} - ''' - } - } - stage('CPU Diff') { - steps { - sh '''#!/bin/bash - set -exo pipefail + stage('CPU Diff') { + steps { + sh '''#!/bin/bash + set -exo pipefail - docker pull gcr.io/kaggle-images/rstats:${PRETEST_TAG} - ./diff --target gcr.io/kaggle-images/rstats:${PRETEST_TAG} - ''' - } - } + docker pull gcr.io/kaggle-images/rstats:${PRETEST_TAG} + ./diff --target gcr.io/kaggle-images/rstats:${PRETEST_TAG} + ''' } } - - stage('GPU') { + stage('GPU Diff') { agent { label 'ephemeral-linux-gpu' } - stages{ - stage('Docker GPU Build') { - steps { - sh '''#!/bin/bash - set -exo pipefail - # Remove images (dangling or not) created more than 120h (5 days ago) to prevent disk from filling up. - docker image prune --all --force --filter "until=120h" --filter "label=kaggle-lang=r" - # Remove any dangling images (no tags). - # All builds for the same branch uses the same tag. This means a subsequent build for the same branch - # will untag the previously built image which is safe to do. Builds for a single branch are performed - # serially. - docker image prune -f - ./build --gpu --base-image-tag ${PRETEST_TAG} | ts - date - ./push --gpu ${PRETEST_TAG} - ''' - } - } - stage('Test GPU Image') { - steps { - sh '''#!/bin/bash - set -exo pipefail - date - ./test --gpu --image gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} - ''' - } - } - stage('GPU Diff') { - steps { - sh '''#!/bin/bash - set -exo pipefail - - docker pull gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} - ./diff --gpu --target gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} - ''' - } - } + steps { + sh '''#!/bin/bash + set -exo pipefail + + docker pull gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} + ./diff --gpu --target gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} + ''' } } } From e5fd40f976036f1eac524a3daad7248dec4c8cc2 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Sat, 3 May 2025 22:17:43 +0000 Subject: [PATCH 5/7] fix tornado build --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 207b86c..5c97bfe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,7 @@ RUN apt-get update && \ apt-get install -y libzmq3-dev default-jdk && \ apt-get install -y python${PYTHON_VERSION}-dev python3-venv libcurl4-openssl-dev libssl-dev && \ pip install jupyter pycurl && \ + pip install --upgrade setuptools && \ # Install older tornado - https://github.com/jupyter/notebook/issues/4437 pip install "tornado<6" && \ pip install notebook && \ @@ -47,7 +48,7 @@ RUN R -e "reticulate::conda_create(envname = \"${ENV_NAME}\", conda = \"auto\", ENV RETICULATE_PYTHON="${MINICONDA_PATH}/envs/${ENV_NAME}/bin/python" # Tensorflow and Keras -ARG TENSORFLOW_VERSION=2.12.0 +ARG TENSORFLOW_VERSION=2.18.0 RUN R -e "keras::install_keras(tensorflow = \"${TENSORFLOW_VERSION}\", extra_packages = c(\"pandas\", \"numpy\", \"pycryptodome\"), method=\"conda\", envname=\"${ENV_NAME}\")" # Install kaggle libraries. From 0579e37c762453cf0ed76156cfcbb05595ea385b Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Mon, 5 May 2025 19:31:35 +0000 Subject: [PATCH 6/7] revert tf upgrade --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5c97bfe..d9811a2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,7 +48,7 @@ RUN R -e "reticulate::conda_create(envname = \"${ENV_NAME}\", conda = \"auto\", ENV RETICULATE_PYTHON="${MINICONDA_PATH}/envs/${ENV_NAME}/bin/python" # Tensorflow and Keras -ARG TENSORFLOW_VERSION=2.18.0 +ARG TENSORFLOW_VERSION=2.12.0 RUN R -e "keras::install_keras(tensorflow = \"${TENSORFLOW_VERSION}\", extra_packages = c(\"pandas\", \"numpy\", \"pycryptodome\"), method=\"conda\", envname=\"${ENV_NAME}\")" # Install kaggle libraries. From c652dc766bd6c62da2787173f2e588e43ccc0b85 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Mon, 5 May 2025 21:12:47 +0000 Subject: [PATCH 7/7] revert 2.11 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d9811a2..7ac8b3b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,7 +48,7 @@ RUN R -e "reticulate::conda_create(envname = \"${ENV_NAME}\", conda = \"auto\", ENV RETICULATE_PYTHON="${MINICONDA_PATH}/envs/${ENV_NAME}/bin/python" # Tensorflow and Keras -ARG TENSORFLOW_VERSION=2.12.0 +ARG TENSORFLOW_VERSION=2.11.0 RUN R -e "keras::install_keras(tensorflow = \"${TENSORFLOW_VERSION}\", extra_packages = c(\"pandas\", \"numpy\", \"pycryptodome\"), method=\"conda\", envname=\"${ENV_NAME}\")" # Install kaggle libraries.