Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion torchprime/launcher/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# syntax=docker/dockerfile:experimental
# Use torch_xla Python 3.10 as the base image
FROM us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm_20250410
FROM us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.7.0_3.10_tpuvm

ARG USE_TRANSFORMERS=false
ARG USE_LOCAL_WHEEL=false
Expand All @@ -24,6 +24,15 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python

WORKDIR /workspaces

# Install torchax
RUN git clone --depth 1 https://github.com/pytorch/xla.git
WORKDIR /workspaces/xla/torchax
RUN pip install torch_xla[pallas] \
-f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html \
-f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html
RUN pip install -e .


# Install torchprime
# Optimization: we rerun `pip install -e .` only if `pyproject.toml` changes.
# Copy only the installation-related files first to make Docker cache them separately.
Expand Down
4 changes: 2 additions & 2 deletions torchprime/torch_xla_models/offloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def remat_all_and_offload_these_inputs(
*,
num_fwd_outputs,
names_to_offload: Sequence[str],
static_lifetime_input_indices=None,
# static_lifetime_input_indices=None,
):
"""Partition the graph to rematerialize forward activations and offload
forward inputs to host.
Expand Down Expand Up @@ -72,7 +72,7 @@ def remat_all_and_offload_these_inputs(
joint_module,
_joint_inputs,
num_fwd_outputs=num_fwd_outputs,
static_lifetime_input_indices=static_lifetime_input_indices,
# static_lifetime_input_indices=static_lifetime_input_indices,
)
with torch.device(input_device):
fw_example_args = _make_arguments(fwd)
Expand Down
4 changes: 2 additions & 2 deletions torchprime/torch_xla_models/remat_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def remat_all_partition_fn(
_joint_inputs,
*,
num_fwd_outputs,
static_lifetime_input_indices=None,
# static_lifetime_input_indices=None,
):
"""
remat_all_partition_fn is a graph partition function that closely matches the
Expand All @@ -30,7 +30,7 @@ def remat_all_partition_fn(
joint_module,
_joint_inputs,
num_fwd_outputs=num_fwd_outputs,
static_lifetime_input_indices=static_lifetime_input_indices,
# static_lifetime_input_indices=static_lifetime_input_indices,
)


Expand Down
9 changes: 8 additions & 1 deletion torchprime/torch_xla_models/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
import transformers
from datasets import load_dataset
from omegaconf import DictConfig, OmegaConf
from packaging import version
from torch import nn
from torch.utils.data import DataLoader, Dataset, IterableDataset
from torch_xla._internal.jax_workarounds import jax_env_context
from torch_xla.distributed.fsdp import checkpoint_module
from torch_xla.distributed.spmd.xla_sharding import apply_xla_patch_to_nn_linear
from transformers import (
Expand All @@ -44,6 +44,13 @@
from torchprime.torch_xla_models import offloading, remat_all, scan_layers
from torchprime.torch_xla_models.topology import get_mesh, is_1d_sharding

if version.parse(torch_xla.__version__.split("+")[0]) >= version.parse("2.8.0"):
from torch_xla._internal.jax_workarounds import jax_env_context
else:
from torch_xla.experimental.custom_kernel import _jax_env_context as jax_env_context



check_min_version("4.39.3")
logger = logging.getLogger(__name__)

Expand Down
Loading