From 93e7dfc22b3a289a13612a5e052e58b06c120f3a Mon Sep 17 00:00:00 2001 From: manfeibai Date: Fri, 14 Apr 2023 18:22:40 +0000 Subject: [PATCH 1/3] test pt-nightly-hf-mlm-roberta-b-pre-conv-v2-8-1vm without FUNCTIONALIZATION --- tests/pytorch/nightly/hf-lm.libsonnet | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/pytorch/nightly/hf-lm.libsonnet b/tests/pytorch/nightly/hf-lm.libsonnet index ed3b36488..687f283f3 100644 --- a/tests/pytorch/nightly/hf-lm.libsonnet +++ b/tests/pytorch/nightly/hf-lm.libsonnet @@ -201,6 +201,20 @@ local utils = import 'templates/utils.libsonnet'; |||, }, }, + local tpuVm_debug = self.tpuVm, + tpuVm:: common.PyTorchTpuVmMixin { + tpuSettings+: { + tpuVmExports+: ||| + export XLA_USE_BF16=$(XLA_USE_BF16) + |||, + tpuVmExtraSetup: ||| + pip install tensorboardX google-cloud-storage + echo 'export PATH=~/.local/bin:$PATH' >> ~/.bash_profile + echo 'export XLA_USE_BF16=1' >> ~/.bash_profile + echo 'export XLA_DISABLE_FUNCTIONALIZATION=1' >> ~/.bash_profile + |||, + }, + }, local v2_8 = self.v2_8, v2_8:: { accelerator: tpus.v2_8, @@ -210,7 +224,7 @@ local utils = import 'templates/utils.libsonnet'; accelerator: tpus.v3_8, }, configs: [ - hf_lm + v2_8 + roberta_base_pre + timeouts.Hours(5) + tpuVm, + hf_lm + v2_8 + roberta_base_pre + timeouts.Hours(5) + tpuVm_debug, hf_lm + v3_8 + roberta_base_fine + timeouts.Hours(3) + tpuVm, hf_lm + v3_8 + bert_base_pre + timeouts.Hours(6) + tpuVm, hf_lm + v3_8 + bert_base_fine + timeouts.Hours(5) + tpuVm, From fe2221845e727988b898bee280f57571a8dac619 Mon Sep 17 00:00:00 2001 From: manfeibai Date: Fri, 14 Apr 2023 18:33:06 +0000 Subject: [PATCH 2/3] comment --- tests/pytorch/nightly/hf-lm.libsonnet | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/pytorch/nightly/hf-lm.libsonnet b/tests/pytorch/nightly/hf-lm.libsonnet index 687f283f3..11fa84031 100644 --- a/tests/pytorch/nightly/hf-lm.libsonnet +++ b/tests/pytorch/nightly/hf-lm.libsonnet @@ -201,6 +201,8 @@ local utils = import 'templates/utils.libsonnet'; |||, }, }, + // create a new config for tpuvm, but modified it to run on tpuvm + // without functionalization by `export XLA_DISABLE_FUNCTIONALIZATION=1` local tpuVm_debug = self.tpuVm, tpuVm:: common.PyTorchTpuVmMixin { tpuSettings+: { @@ -224,6 +226,7 @@ local utils = import 'templates/utils.libsonnet'; accelerator: tpus.v3_8, }, configs: [ + // only test pt-nightly-hf-mlm-roberta-b-pre-conv-v2-8-1vm without functionalization hf_lm + v2_8 + roberta_base_pre + timeouts.Hours(5) + tpuVm_debug, hf_lm + v3_8 + roberta_base_fine + timeouts.Hours(3) + tpuVm, hf_lm + v3_8 + bert_base_pre + timeouts.Hours(6) + tpuVm, From e5a02f9050b9e3031957468d4640584c7ba8f94b Mon Sep 17 00:00:00 2001 From: manfeibai Date: Fri, 14 Apr 2023 22:56:59 +0000 Subject: [PATCH 3/3] delet duplicate --- tests/pytorch/nightly/hf-lm.libsonnet | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/tests/pytorch/nightly/hf-lm.libsonnet b/tests/pytorch/nightly/hf-lm.libsonnet index 11fa84031..d5696010f 100644 --- a/tests/pytorch/nightly/hf-lm.libsonnet +++ b/tests/pytorch/nightly/hf-lm.libsonnet @@ -188,22 +188,8 @@ local utils = import 'templates/utils.libsonnet'; }, }, }, + // update config for tpuvm to run without functionalization by `export XLA_DISABLE_FUNCTIONALIZATION=1` local tpuVm = self.tpuVm, - tpuVm:: common.PyTorchTpuVmMixin { - tpuSettings+: { - tpuVmExports+: ||| - export XLA_USE_BF16=$(XLA_USE_BF16) - |||, - tpuVmExtraSetup: ||| - pip install tensorboardX google-cloud-storage - echo 'export PATH=~/.local/bin:$PATH' >> ~/.bash_profile - echo 'export XLA_USE_BF16=1' >> ~/.bash_profile - |||, - }, - }, - // create a new config for tpuvm, but modified it to run on tpuvm - // without functionalization by `export XLA_DISABLE_FUNCTIONALIZATION=1` - local tpuVm_debug = self.tpuVm, tpuVm:: common.PyTorchTpuVmMixin { tpuSettings+: { tpuVmExports+: ||| @@ -226,8 +212,8 @@ local utils = import 'templates/utils.libsonnet'; accelerator: tpus.v3_8, }, configs: [ - // only test pt-nightly-hf-mlm-roberta-b-pre-conv-v2-8-1vm without functionalization - hf_lm + v2_8 + roberta_base_pre + timeouts.Hours(5) + tpuVm_debug, + // want to test pt-nightly-hf-mlm-roberta-b-pre-conv-v2-8-1vm without functionalization + hf_lm + v2_8 + roberta_base_pre + timeouts.Hours(5) + tpuVm, hf_lm + v3_8 + roberta_base_fine + timeouts.Hours(3) + tpuVm, hf_lm + v3_8 + bert_base_pre + timeouts.Hours(6) + tpuVm, hf_lm + v3_8 + bert_base_fine + timeouts.Hours(5) + tpuVm,