From fb58fba9d79b0284eb0e666abe3c2fd7a0353710 Mon Sep 17 00:00:00 2001 From: Joshua Schmidt Date: Wed, 6 Aug 2025 15:51:12 +1000 Subject: [PATCH 1/5] initial docker file and res for hail on gcp-vertex-workbench --- images/hail-gcp-vertex-workbench/Dockerfile | 52 ++++ .../requirements.txt | 226 ++++++++++++++++++ 2 files changed, 278 insertions(+) create mode 100644 images/hail-gcp-vertex-workbench/Dockerfile create mode 100644 images/hail-gcp-vertex-workbench/requirements.txt diff --git a/images/hail-gcp-vertex-workbench/Dockerfile b/images/hail-gcp-vertex-workbench/Dockerfile new file mode 100644 index 00000000..85ebc16d --- /dev/null +++ b/images/hail-gcp-vertex-workbench/Dockerfile @@ -0,0 +1,52 @@ +# description of slim base container https://cloud.google.com/vertex-ai/docs/workbench/instances/create-custom-container#slim-base-container +# NB: uses micromamba, NOT conda +FROM us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-container-slim:latest AS base + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + htop \ + unzip \ + bzip2 \ + zip \ + tar \ + rsync \ + xsltproc pandoc \ + openjdk-11-jdk-headless \ + liblapack3 \ + libopenblas-dev \ + libpq-dev \ + liblz4-dev \ + g++ \ + gcc \ + cmake \ + apt-transport-https \ + ca-certificates \ + gnupg \ + curl && \ + rm -rf /var/lib/apt/lists/* + +FROM base AS gcp_build +# gcloud install +RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ + apt-get update && \ + apt-get install -y \ + google-cloud-cli + +FROM gcp_build AS hail_build +ARG VERSION=${VERSION:-0.2.135} +ENV KERNEL_DISPLAY_NAME="hail-${VERSION}" +ENV ENVIRONMENT_NAME="hail-${VERSION}" +ENV MICROMAMBA_ENV_HOME="/opt/micromamba/envs" +COPY requirements.txt requirements.txt + +# create env and install hail + dependences +RUN micromamba create --prefix "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" python=3.10 ipykernel && \ + micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" pip install --no-cache-dir --no-deps -r requirements.txt && \ + micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" python -m ipykernel install --name "${ENVIRONMENT_NAME}" --display-name "${KERNEL_DISPLAY_NAME}" && \ + curl -o "$(micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" find_spark_home.py)/jars/gcs-connector-hadoop2-2.2.28.jar" https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.2.28.jar + + diff --git a/images/hail-gcp-vertex-workbench/requirements.txt b/images/hail-gcp-vertex-workbench/requirements.txt new file mode 100644 index 00000000..f2c29866 --- /dev/null +++ b/images/hail-gcp-vertex-workbench/requirements.txt @@ -0,0 +1,226 @@ +aiodns==2.0.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annoy==1.17.3 +anyio==4.10.0 +argon2-cffi==25.1.0 +argon2-cffi-bindings==25.1.0 +asttokens==3.0.0 +async-timeout==5.0.1 +attrs==25.3.0 +avro==1.11.3 +azure-common==1.1.28 +azure-core==1.35.0 +azure-identity==1.23.1 +azure-mgmt-core==1.6.0 +azure-mgmt-storage==20.1.0 +azure-storage-blob==12.26.0 +azure-storage-file-datalake==12.21.0 +backoff==2.2.1 +beautifulsoup4==4.13.4 +bigquery_jupyter_plugin==0.0.1 +biocommons.seqrepo==0.6.11 +bioutils==0.6.1 +bleach==6.2.0 +bokeh==3.4.3 +boto3==1.40.3 +botocore==1.40.3 +cachetools==5.5.2 +canonicaljson==2.0.0 +certifi==2025.8.3 +cffi==1.17.1 +cfgv==3.4.0 +charset-normalizer==3.4.2 +click==8.2.1 +cloudpathlib==0.21.1 +coloredlogs==15.0.1 +comm==0.2.3 +commonmark==0.9.1 +configparser==7.2.0 +contourpy==1.3.2 +cpg-utils==5.4.1 +cryptography==45.0.6 +cycler==0.12.1 +dataproc_jupyter_plugin==0.1.74 +debugpy==1.8.15 +decorator==4.4.2 +defusedxml==0.7.1 +Deprecated==1.2.18 +dill==0.3.9 +distlib==0.4.0 +exceptiongroup==1.3.0 +executing==2.2.0 +fastjsonschema==2.21.1 +filelock==3.18.0 +flatbuffers==25.2.10 +fonttools==4.59.0 +frozendict==2.4.6 +frozenlist==1.7.0 +fsspec==2025.7.0 +ga4gh.vrs==0.8.4 +gcsfs==2025.7.0 +gnomad==0.8.2 +google-api-core==2.25.1 +google-auth==2.40.3 +google-auth-oauthlib==0.8.0 +google-cloud-artifact-registry==1.16.1 +google-cloud-core==2.4.3 +google-cloud-jupyter-config==0.0.10 +google-cloud-secret-manager==2.24.0 +google-cloud-storage==3.2.0 +google-crc32c==1.7.1 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +gql==3.5.3 +graphql-core==3.2.6 +grpc-google-iam-v1==0.14.2 +grpcio==1.74.0 +grpcio-status==1.74.0 +hail==0.2.135 +hdbscan==0.8.40 +hgvs==1.5.5 +humanfriendly==10.0 +humanize==4.12.3 +identify==2.6.12 +idna==3.10 +importlib_metadata==8.7.0 +importlib_resources==6.5.2 +inflection==0.5.1 +ipykernel==6.30.1 +ipython==8.37.0 +ipywidgets==8.1.7 +isodate==0.7.2 +janus==1.0.0 +jedi==0.19.2 +Jinja2==3.1.6 +jmespath==1.0.1 +joblib==1.5.1 +jproperties==2.1.2 +jsonpointer==3.0.0 +jsonschema==3.2.0 +jupyter_client==8.6.3 +jupyter_core==5.8.1 +jupyter-events==0.6.3 +jupyter_server==2.10.0 +jupyter_server_terminals==0.5.3 +jupyterlab_pygments==0.3.0 +jupyterlab_widgets==3.0.15 +kernels-mixer==0.0.15 +kiwisolver==1.4.8 +Markdown==3.8.2 +MarkupSafe==3.0.2 +matplotlib==3.10.5 +matplotlib-inline==0.1.7 +metamist==7.10.2 +mistune==3.1.3 +mpmath==1.3.0 +msal==1.33.0 +msal-extensions==1.3.1 +msrest==0.7.1 +multidict==6.6.3 +nbclient==0.10.2 +nbconvert==7.16.6 +nbformat==5.10.4 +nest_asyncio==1.6.0 +networkx==3.4.2 +nodeenv==1.9.1 +numpy==2.2.6 +oauthlib==3.3.1 +onnx==1.18.0 +onnxruntime==1.22.1 +orjson==3.11.1 +overrides==7.7.0 +packaging==25.0 +pandas==2.3.1 +pandocfilters==1.5.1 +parsimonious==0.10.0 +Parsley==1.3 +parso==0.8.4 +patsy==1.0.1 +pendulum==3.1.0 +pexpect==4.9.0 +pickleshare==0.7.5 +pillow==11.3.0 +pip==25.2 +platformdirs==4.3.8 +plotly==5.24.1 +polars==1.32.0 +pre-commit==3.8.0 +prometheus_client==0.22.1 +prompt_toolkit==3.0.51 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==6.31.1 +psutil==7.0.0 +psycopg2-binary==2.9.10 +ptyprocess==0.7.0 +pure_eval==0.2.3 +py4j==0.10.9.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycares==4.10.0 +pycparser==2.22 +pydantic==1.10.22 +Pygments==2.19.2 +PyJWT==2.10.1 +pyparsing==3.2.3 +pyrsistent==0.20.0 +pysam==0.23.3 +pyspark==3.5.6 +python-dateutil==2.9.0.post0 +python-json-logger==2.0.7 +python-jsonschema-objects==0.4.6 +pytz==2025.2 +PyYAML==6.0.2 +pyzmq==27.0.1 +regex==2025.7.34 +requests==2.32.4 +requests-oauthlib==2.0.0 +requests-toolbelt==1.0.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rich==12.6.0 +rsa==4.9.1 +s3transfer==0.13.1 +scikit-learn==1.7.1 +scipy==1.15.3 +Send2Trash==1.8.3 +setuptools==80.9.0 +shellingham==1.5.4 +six==1.17.0 +skl2onnx==1.19.1 +slackclient==2.5.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +sqlparse==0.5.3 +stack_data==0.6.3 +statsmodels==0.14.5 +sympy==1.14.0 +tabulate==0.9.0 +tenacity==9.1.2 +terminado==0.18.1 +threadpoolctl==3.6.0 +tinycss2==1.4.0 +toml==0.10.2 +tornado==6.5.1 +tqdm==4.67.1 +traitlets==5.14.3 +typer==0.16.0 +typing_extensions==4.14.1 +tzdata==2025.2 +urllib3==2.5.0 +uvloop==0.21.0 +virtualenv==20.33.1 +wcwidth==0.2.13 +webcolors==24.11.1 +webencodings==0.5.1 +websocket-client==1.8.0 +wheel==0.45.1 +widgetsnbextension==4.0.14 +wrapt==1.17.2 +xyzservices==2025.4.0 +yarl==1.20.1 +yoyo-migrations==9.0.0 +zipp==3.23.0 From 3ce3f09569619f3eb12af5d41e7f644d0e069355 Mon Sep 17 00:00:00 2001 From: Joshua Schmidt Date: Thu, 4 Sep 2025 13:49:41 +1000 Subject: [PATCH 2/5] typo --- images/hail-gcp-vertex-workbench/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/images/hail-gcp-vertex-workbench/Dockerfile b/images/hail-gcp-vertex-workbench/Dockerfile index 85ebc16d..43bab8ed 100644 --- a/images/hail-gcp-vertex-workbench/Dockerfile +++ b/images/hail-gcp-vertex-workbench/Dockerfile @@ -43,7 +43,7 @@ ENV ENVIRONMENT_NAME="hail-${VERSION}" ENV MICROMAMBA_ENV_HOME="/opt/micromamba/envs" COPY requirements.txt requirements.txt -# create env and install hail + dependences +# create env and install hail + dependencies RUN micromamba create --prefix "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" python=3.10 ipykernel && \ micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" pip install --no-cache-dir --no-deps -r requirements.txt && \ micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" python -m ipykernel install --name "${ENVIRONMENT_NAME}" --display-name "${KERNEL_DISPLAY_NAME}" && \ From 101e3671e6f78b3813cd10c7512387173ab1d55c Mon Sep 17 00:00:00 2001 From: Joshua Schmidt Date: Thu, 4 Sep 2025 14:01:53 +1000 Subject: [PATCH 3/5] linting --- images/hail-gcp-vertex-workbench/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/images/hail-gcp-vertex-workbench/Dockerfile b/images/hail-gcp-vertex-workbench/Dockerfile index 43bab8ed..546f535f 100644 --- a/images/hail-gcp-vertex-workbench/Dockerfile +++ b/images/hail-gcp-vertex-workbench/Dockerfile @@ -49,4 +49,3 @@ RUN micromamba create --prefix "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" py micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" python -m ipykernel install --name "${ENVIRONMENT_NAME}" --display-name "${KERNEL_DISPLAY_NAME}" && \ curl -o "$(micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" find_spark_home.py)/jars/gcs-connector-hadoop2-2.2.28.jar" https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.2.28.jar - From 98313de9cf5de55df726ee366e455a2e79f34536 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Thu, 16 Oct 2025 18:05:54 +1300 Subject: [PATCH 4/5] Use canonical base image path and silence whitespace linter --- images/hail-gcp-vertex-workbench/Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/images/hail-gcp-vertex-workbench/Dockerfile b/images/hail-gcp-vertex-workbench/Dockerfile index 546f535f..57810a05 100644 --- a/images/hail-gcp-vertex-workbench/Dockerfile +++ b/images/hail-gcp-vertex-workbench/Dockerfile @@ -1,6 +1,6 @@ # description of slim base container https://cloud.google.com/vertex-ai/docs/workbench/instances/create-custom-container#slim-base-container # NB: uses micromamba, NOT conda -FROM us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-container-slim:latest AS base +FROM gcr.io/deeplearning-platform-release/workbench-container-slim:latest AS base ENV DEBIAN_FRONTEND=noninteractive @@ -43,9 +43,8 @@ ENV ENVIRONMENT_NAME="hail-${VERSION}" ENV MICROMAMBA_ENV_HOME="/opt/micromamba/envs" COPY requirements.txt requirements.txt -# create env and install hail + dependencies +# create env and install hail + dependencies RUN micromamba create --prefix "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" python=3.10 ipykernel && \ micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" pip install --no-cache-dir --no-deps -r requirements.txt && \ micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" python -m ipykernel install --name "${ENVIRONMENT_NAME}" --display-name "${KERNEL_DISPLAY_NAME}" && \ curl -o "$(micromamba run -p "${MICROMAMBA_ENV_HOME}"/"${ENVIRONMENT_NAME}" find_spark_home.py)/jars/gcs-connector-hadoop2-2.2.28.jar" https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.2.28.jar - From 00998b7e8384eafc0aba17c0b3561ba63ea5a182 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Tue, 25 Nov 2025 15:40:20 +1300 Subject: [PATCH 5/5] Use slack_sdk instead of long-deprecated slackclient --- images/hail-gcp-vertex-workbench/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/images/hail-gcp-vertex-workbench/requirements.txt b/images/hail-gcp-vertex-workbench/requirements.txt index f2c29866..c65527d8 100644 --- a/images/hail-gcp-vertex-workbench/requirements.txt +++ b/images/hail-gcp-vertex-workbench/requirements.txt @@ -190,7 +190,7 @@ setuptools==80.9.0 shellingham==1.5.4 six==1.17.0 skl2onnx==1.19.1 -slackclient==2.5.0 +slack_sdk==3.39.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7