diff --git a/Dockerfile b/Dockerfile index 44a9f8651..9c890d28b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,8 +13,8 @@ ARG MAX_JOBS=8 # Git refs for dependencies ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea ARG PYTRITON_VERSION=0.5.10 -ARG NEMO_TAG=19668e5320a2e2af0199b6d5e0b841993be3a634 # On: main -ARG MLM_TAG=25059d3bbf68be0751800f3644731df12a88f3f3 # On: main +ARG NEMO_TAG=ko3n1g/build/move-to-req # On: main +ARG MCORE_TAG=25059d3bbf68be0751800f3644731df12a88f3f3 # On: main ARG ALIGNER_COMMIT=main ARG TRTLLM_VERSION=v0.13.0 ARG PROTOBUF_VERSION=4.24.4 @@ -34,8 +34,6 @@ git checkout -f $ALIGNER_COMMIT # case 1: ALIGNER_COMMIT is a local branch so we have to apply remote changes to it # case 2: ALIGNER_COMMIT is a commit, so git-pull is expected to fail git pull --rebase || true - -pip install --no-cache-dir --no-deps -e . EOF FROM ${BASE_IMAGE} as final @@ -44,15 +42,15 @@ WORKDIR /opt # needed in case git complains that it can't detect a valid email, this email is fake but works RUN git config --global user.email "worker@nvidia.com" # install latest apex -ARG APEX_TAG -RUN pip uninstall -y apex && \ - git clone https://github.com/NVIDIA/apex && \ - cd apex && \ - if [ ! -z $APEX_TAG ]; then \ - git fetch origin $APEX_TAG && \ - git checkout FETCH_HEAD; \ - fi && \ - pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./ +# ARG APEX_TAG +# RUN pip uninstall -y apex && \ +# git clone https://github.com/NVIDIA/apex && \ +# cd apex && \ +# if [ ! -z $APEX_TAG ]; then \ +# git fetch origin $APEX_TAG && \ +# git checkout FETCH_HEAD; \ +# fi && \ +# pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./ # Git LFS RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \ @@ -60,15 +58,15 @@ RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.d git lfs install && \ apt-get clean -# TRTLLM -ARG TRTLLM_VERSION -RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \ - cd TensorRT-LLM && \ - git checkout ${TRTLLM_VERSION} && \ - . docker/common/install_tensorrt.sh && \ - python3 ./scripts/build_wheel.py --job_count $(nproc) --trt_root /usr/local/tensorrt --python_bindings --benchmarks && \ - pip install -e . -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/ +# # TRTLLM +# ARG TRTLLM_VERSION +# RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \ +# cd TensorRT-LLM && \ +# git checkout ${TRTLLM_VERSION} && \ +# . docker/common/install_tensorrt.sh && \ +# python3 ./scripts/build_wheel.py --job_count $(nproc) --trt_root /usr/local/tensorrt --python_bindings --benchmarks && \ +# pip install -e . +# ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/ # install TransformerEngine ARG MAX_JOBS @@ -77,18 +75,10 @@ RUN pip uninstall -y transformer-engine && \ git clone https://github.com/NVIDIA/TransformerEngine.git && \ cd TransformerEngine && \ if [ ! -z $TE_TAG ]; then \ - git fetch origin $TE_TAG && \ - git checkout FETCH_HEAD; \ + git fetch origin $TE_TAG && \ + git checkout FETCH_HEAD; \ fi && \ - git submodule init && git submodule update && \ - NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install . - -# place any util pkgs here -ARG PYTRITON_VERSION -RUN pip install --upgrade-strategy only-if-needed nvidia-pytriton==$PYTRITON_VERSION -ARG PROTOBUF_VERSION -RUN pip install -U --no-deps protobuf==$PROTOBUF_VERSION -RUN pip install --upgrade-strategy only-if-needed jsonlines + git submodule init && git submodule update # NeMo ARG NEMO_TAG @@ -96,28 +86,26 @@ RUN git clone https://github.com/NVIDIA/NeMo.git && \ cd NeMo && \ git pull && \ if [ ! -z $NEMO_TAG ]; then \ - git fetch origin $NEMO_TAG && \ - git checkout FETCH_HEAD; \ - fi && \ - pip uninstall -y nemo_toolkit sacrebleu && \ - pip install -e ".[nlp]" && \ - cd nemo/collections/nlp/data/language_modeling/megatron && make + git fetch origin $NEMO_TAG && \ + git checkout FETCH_HEAD; \ + fi # MLM -ARG MLM_TAG +ARG MCORE_TAG RUN pip uninstall -y megatron-core && \ git clone https://github.com/NVIDIA/Megatron-LM.git && \ cd Megatron-LM && \ git pull && \ - if [ ! -z $MLM_TAG ]; then \ - git fetch origin $MLM_TAG && \ - git checkout FETCH_HEAD; \ - fi && \ - pip install -e . + if [ ! -z $MCORE_TAG ]; then \ + git fetch origin $MCORE_TAG && \ + git checkout FETCH_HEAD; \ + fi COPY --from=aligner-bump /opt/NeMo-Aligner /opt/NeMo-Aligner +ARG PYTRITON_VERSION +ARG PROTOBUF_VERSION RUN cd /opt/NeMo-Aligner && \ - pip install --no-deps -e . + NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install . RUN cd TensorRT-LLM && patch -p1 < ../NeMo-Aligner/setup/trtllm.patch diff --git a/nemo_aligner/utils/trt_llm.py b/nemo_aligner/utils/trt_llm.py index 1f879064d..9f1a9345f 100644 --- a/nemo_aligner/utils/trt_llm.py +++ b/nemo_aligner/utils/trt_llm.py @@ -44,8 +44,9 @@ def append_and_repad_list(list_of_items, item_to_append, pad_id): class GPTGenerateTRTLLM: - # If a tokenizer does not have a pad_id, we use a large negative number and replace - # with self.eos_id after generation. + # Use a reserved negative number since there is variation between tokenizers if + # they (1) have a pad_id (2) don't have a pad_id or (3) have None as the pad_id. + # This pad_id is replaced with eos_id after generation. DEFAULT_PAD_ID = -42 def __init__( @@ -72,12 +73,6 @@ def __init__( "You are trying to use NeMo-Aligner's TensorRT-LLM acceleration for LLM generation. Please build the dockerfile to enable this feature: https://github.com/NVIDIA/NeMo-Aligner/blob/main/Dockerfile" ) - # If this assert turns out to be a blocker with some tokenizers, potential workarounds could be to: - # - add a config option to allow specifying which token we pass as `end_id` to TRT-LLM (should - # be a token that the model is guaranteed to never generate) - assert ( - tokenizer.pad_id != tokenizer.eos_id - ), f"We require tokenizers to have a different {tokenizer.pad_id=} than {tokenizer.eos_id=} when using TRT-LLM. This is to make sure all code goes into the same path and include the eos_id when the response lengths are computed" assert max_input_len > 0 assert max_generation_length > 0 assert ( @@ -104,7 +99,7 @@ def __init__( rng_generator.manual_seed(seed) self.rng_generator = rng_generator - self.pad_id = tokenizer.pad_id if tokenizer.pad_id is not None else GPTGenerateTRTLLM.DEFAULT_PAD_ID + self.pad_id = GPTGenerateTRTLLM.DEFAULT_PAD_ID self.eos_id = tokenizer.eos_id end_strings = list(end_strings) diff --git a/pyproject.toml b/pyproject.toml index 09462e9d9..0544c99c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,8 @@ profile = "black" # black-compatible line_length = 119 # should match black parameters ignore_whitespace = true # ignore whitespace for compatibility with the initial style -py_version = 38 # python 3.8 as a target version +py_version = 310 # python 3.9 as a target version +requires-python = ">=3.10" known_first_party = ["nemo", "nemo_aligner"] # FIRSTPARTY section known_third_party = ["examples"] # THIRDPARTY section sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"] diff --git a/setup.py b/setup.py index cc3801455..29ee9b4bf 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ import subprocess from distutils import cmd as distutils_cmd from distutils import log as distutils_log - +import re import setuptools spec = importlib.util.spec_from_file_location("package_info", "nemo_aligner/package_info.py") @@ -62,13 +62,30 @@ # Dependency Loading # # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # +# Function to replace ${VAR} or ${VAR:-default_value} with environment variable or default +def replace_env_vars(text): + # Regex to match ${VAR} or ${VAR:-default_value} + pattern = re.compile(r"\$\{(\w+)(:-([^}]*))?\}") + + def replace_var(match): + var_name = match.group(1) # The environment variable name + default_value = match.group(3) # The default value if provided + + # Return the environment variable value or the default (if available) or empty string + return os.environ.get(var_name, default_value if default_value is not None else f"${{{var_name}}}") + + # Substitute all patterns in the text + return pattern.sub(replace_var, text) + def req_file(filename, folder="requirements"): with open(os.path.join(folder, filename), encoding="utf-8") as f: content = f.readlines() - # you may also want to remove whitespace characters - # Example: `\n` at the end of each line - return [x.strip() for x in content if x.strip()] + requirements = [x.strip() for x in content] + requirements = [ + replace_env_vars(line.strip()) for line in requirements if line.strip() and not line.startswith("#") + ] + return requirements install_requires = req_file("requirements.txt", folder="setup") diff --git a/setup/requirements.txt b/setup/requirements.txt index 198d2e07a..dcccd2245 100644 --- a/setup/requirements.txt +++ b/setup/requirements.txt @@ -1,4 +1,5 @@ +datasets>=3.0.1 jsonlines -megatron_core>=0.8 -nemo_toolkit[nlp] -nvidia-pytriton +nemo_toolkit[nlp] @ git+https://github.com/NVIDIA/NeMo.git@${NEMO_TAG}#egg=nemo_toolkit[nlp] +nvidia-pytriton #==${PYTRITON_VERSION:-0.5.10} +protobuf==${PROTOBUF_VERSION:-4.24.4}