From 4f5f2f81b091b5acd23838f49a27e5ffd9b4debc Mon Sep 17 00:00:00 2001 From: jennychristopher Date: Fri, 18 Jul 2025 15:14:55 +0000 Subject: [PATCH 1/2] Add DeepSeek-R1-Distill-Qwen-14B as a supported model in tt-transformers --- models/tt_transformers/tt/model_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/models/tt_transformers/tt/model_config.py b/models/tt_transformers/tt/model_config.py index affdee74a2ce..8a815785c216 100644 --- a/models/tt_transformers/tt/model_config.py +++ b/models/tt_transformers/tt/model_config.py @@ -566,6 +566,7 @@ def __init__( "Phi-3.5-mini-instruct": {"N150": 128, "N300": 128, "T3K": 128, "TG": 128, "P150x4": 128}, "QwQ-32B": {"N150": None, "N300": None, "T3K": 64, "TG": 128, "P150x4": 128}, "Qwen3-32B": {"N150": None, "N300": None, "T3K": 64, "TG": 128, "P150x4": 128}, + "DeepSeek-R1-Distill-Qwen-14B": {"N150": 4, "N300": 64, "T3K": 128, "TG": None, "P150x4": None}, } try: max_prefill_chunk_size_div1024 = MAX_PREFILL_CHUNK_SIZES_DIV1024[self.base_model_name][self.device_name] From 5a061e34d78413787828b9a782b7b4b599a6dace Mon Sep 17 00:00:00 2001 From: Jenny Date: Mon, 25 Aug 2025 08:08:58 +0530 Subject: [PATCH 2/2] Add Deepseel-r1-distill-qwen to CI --- .github/workflows/single-card-demo-tests-impl.yaml | 1 + tests/scripts/single_card/run_single_card_demo_tests.sh | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/.github/workflows/single-card-demo-tests-impl.yaml b/.github/workflows/single-card-demo-tests-impl.yaml index 8346a0269fe7..67d0b251896d 100644 --- a/.github/workflows/single-card-demo-tests-impl.yaml +++ b/.github/workflows/single-card-demo-tests-impl.yaml @@ -115,6 +115,7 @@ jobs: # # Moved to t3k tests until OOM on single card runners resolved # { name: "qwen7b", runner-label: "N300", performance: false, cmd: run_qwen7b_func, owner_id: U03PUAKE719}, # Mark O'Connor { name: "qwen25_vl", runner-label: "N300", performance: true, cmd: run_qwen25_vl_func, owner_id: U07RY6B5FLJ}, #Gongyu Wang + { name: "deepseek_r1_distill_qwen", runner-label: "N300", performance: true, cmd: run_deepseek_r1_distill_qwen_14b_func}, ] ] steps: diff --git a/tests/scripts/single_card/run_single_card_demo_tests.sh b/tests/scripts/single_card/run_single_card_demo_tests.sh index 705176a960b8..4b689072d7c6 100755 --- a/tests/scripts/single_card/run_single_card_demo_tests.sh +++ b/tests/scripts/single_card/run_single_card_demo_tests.sh @@ -21,6 +21,12 @@ run_qwen7b_func() { } +run_deepseek_r1_distill_qwen_14b_func() { + + HF_MODEL=/mnt/MLPerf/tt_dnn-models/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B MESH_DEVICE=N300 pytest -n auto models/tt_transformers/demo/simple_text_demo.py -k performance-ci-1 --timeout 1800 + +} + run_qwen25_vl_func() { fail=0