diff --git a/Makefile b/Makefile index b66b7b8..5ce465c 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ CUDA_118_PREFIX := $(REGISTRY_REPO):cuda-11.8- CUDA_121_PREFIX := $(REGISTRY_REPO):$(UBUNTU_VERSION)-cuda-12.1- CUDA_124_PREFIX := $(REGISTRY_REPO):$(UBUNTU_VERSION)-cuda-12.4- CUDA_128_PREFIX := $(REGISTRY_REPO):$(UBUNTU_VERSION)-cuda-12.8- +CUDA_129_PREFIX := $(REGISTRY_REPO):$(UBUNTU_VERSION)-cuda-12.9- ROCM_56_PREFIX := $(REGISTRY_REPO):rocm-5.6- CPU_SUFFIX := -cpu @@ -95,6 +96,7 @@ export GPU_CUDA_118_BASE_NAME := $(CUDA_118_PREFIX)base$(GPU_SUFFIX) export GPU_CUDA_121_BASE_NAME := $(CUDA_121_PREFIX)base$(GPU_SUFFIX) export GPU_CUDA_124_BASE_NAME := $(CUDA_124_PREFIX)base$(GPU_SUFFIX) export GPU_CUDA_128_BASE_NAME := $(CUDA_128_PREFIX)base$(GPU_SUFFIX) +export GPU_CUDA_129_BASE_NAME := $(CUDA_129_PREFIX)base$(GPU_SUFFIX) # Timeout used by packer for AWS operations. Default is 120 (30 minutes) for # waiting for AMI availablity. Bump to 360 attempts = 90 minutes. @@ -272,6 +274,18 @@ build-gpu-cuda-128-base: -o type=image,push=false \ . +.PHONY: build-gpu-cuda-129-base +build-gpu-cuda-129-base: + docker build -f Dockerfile-base-gpu \ + --build-arg BASE_IMAGE="nvidia/cuda:12.9.1-cudnn-devel-$(UBUNTU_VERSION)" \ + --build-arg PYTHON_VERSION="$(PYTHON_VERSION_311)" \ + --build-arg UBUNTU_VERSION="$(UBUNTU_VERSION)" \ + --build-arg "$(MPI_BUILD_ARG)" \ + -t $(DOCKERHUB_REGISTRY)/$(GPU_CUDA_129_BASE_NAME)-$(SHORT_GIT_HASH) \ + -t $(DOCKERHUB_REGISTRY)/$(GPU_CUDA_129_BASE_NAME)-$(VERSION) \ + -o type=image,push=false \ + . + export CPU_TF1_ENVIRONMENT_NAME := $(CPU_PREFIX_37)pytorch-1.7-tf-1.15$(CPU_SUFFIX) export GPU_TF1_ENVIRONMENT_NAME := $(CUDA_102_PREFIX)pytorch-1.7-tf-1.15$(GPU_SUFFIX) @@ -355,11 +369,13 @@ export GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME_201 := $(CUDA_118_PREFIX)$(PY_39_ export GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME_210 := $(CUDA_121_PREFIX)$(PY_39_TAG)pytorch-2.1.0-gpt-neox-deepspeed$(GPU_SUFFIX) export GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME_240 := $(CUDA_124_PREFIX)$(PY_39_TAG)pytorch-2.4.0-gpt-neox-deepspeed$(GPU_SUFFIX) export GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME_270 := $(CUDA_128_PREFIX)$(PY_311_TAG)pytorch-2.7.0-gpt-neox-deepspeed$(GPU_SUFFIX) +export GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME_280 := $(CUDA_129_PREFIX)$(PY_311_TAG)pytorch-2.8.0-gpt-neox-deepspeed$(GPU_SUFFIX) export TORCH_PIP_DEEPSPEED_GPU := torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1+cu117 -f https://download.pytorch.org/whl/cu117/torch_stable.html export TORCH_PIP_DEEPSPEED_GPU_201 := torch==2.0.1+cu118 torchvision==0.15.2+cu118 torchaudio==2.0.2+cu118 -f https://download.pytorch.org/whl/cu118/torch_stable.html export TORCH_PIP_DEEPSPEED_GPU_210 := torch==2.1.0+cu121 torchvision==0.16.0+cu121 torchaudio==2.1.0+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html export TORCH_PIP_DEEPSPEED_GPU_240 := torch==2.4.0+cu124 torchvision==0.19.0+cu124 torchaudio==2.4.0+cu124 --index-url https://download.pytorch.org/whl/cu124 export TORCH_PIP_DEEPSPEED_GPU_270 := torch==2.7.0+cu128 torchvision==0.22.0+cu128 torchaudio==2.7.0+cu128 --index-url https://download.pytorch.org/whl/cu128 +export TORCH_PIP_DEEPSPEED_GPU_280 := torch==2.8.0+cu129 torchvision==0.23.0+cu129 torchaudio==2.8.0+cu129 --index-url https://download.pytorch.org/whl/cu129 export TORCH_TB_PROFILER_PIP := torch-tb-profiler==0.4.1 # This builds deepspeed environment off of upstream microsoft/DeepSpeed. @@ -398,6 +414,9 @@ augment-torch-240: build-gpt-neox-deepspeed-gpu-torch-240 .PHONY: augment-torch-270 augment-torch-270: build-gpt-neox-deepspeed-gpu-torch-270 +.PHONY: augment-torch-280 +augment-torch-280: build-gpt-neox-deepspeed-gpu-torch-280 + # This builds deepspeed environment off of a patched version of EleutherAI's fork of DeepSpeed # that we need for gpt-neox support. .PHONY: build-gpt-neox-deepspeed-gpu @@ -484,6 +503,20 @@ build-gpt-neox-deepspeed-gpu-torch-270: build-gpu-cuda-128-base -o type=image,push=false \ . +.PHONY: build-gpt-neox-deepspeed-gpu-torch-280 +build-gpt-neox-deepspeed-gpu-torch-280: build-gpu-cuda-129-base + docker build -f Dockerfile-default-gpu \ + --build-arg BASE_IMAGE="$(DOCKERHUB_REGISTRY)/$(GPU_CUDA_129_BASE_NAME)-$(SHORT_GIT_HASH)" \ + --build-arg TORCH_PIP="$(TORCH_PIP_DEEPSPEED_GPU_280)" \ + --build-arg TORCH_TB_PROFILER_PIP="$(TORCH_TB_PROFILER_PIP)" \ + --build-arg TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0" \ + --build-arg DET_BUILD_NCCL="" \ + --build-arg DEEPSPEED_PIP="git+https://github.com/augmentcode/DeeperSpeed.git@d08ec4e806ace0721026dd83067ca43ddc697e15" \ + -t $(DOCKERHUB_REGISTRY)/$(GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME_280)-$(SHORT_GIT_HASH) \ + -t $(DOCKERHUB_REGISTRY)/$(GPU_GPT_NEOX_DEEPSPEED_ENVIRONMENT_NAME_280)-$(VERSION) \ + -o type=image,push=false \ + . + ifeq ($(NGC_PUBLISH),) define CPU_TF28_TAGS -t $(DOCKERHUB_REGISTRY)/$(CPU_TF28_ENVIRONMENT_NAME)-$(SHORT_GIT_HASH) \