Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/single-card-demo-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ jobs:
{ name: "qwen25_vl", runner-label: "N300", performance: true, cmd: run_qwen25_vl_func, owner_id: U07RY6B5FLJ}, #Gongyu Wang
{ name: "gemma3", runner-label: "N150", performance: true, cmd: run_gemma3_perf, owner_id: U08TJ70UFRT}, # Harry Andrews
{ name: "gemma3", runner-label: "N300", performance: true, cmd: run_gemma3_perf, owner_id: U08TJ70UFRT}, # Harry Andrews
# { name: "gemma3_1b", runner-label: "N150", performance: true, cmd: run_gemma3_1b_func, owner_id: }, # TODO Owner ID needs to be updated
# { name: "gemma3_4b", runner-label: "N300", performance: true, cmd: run_gemma3_4b_func, owner_id: }, # TODO Owner ID needs to be updated
]]

steps:
- name: Compute tests
shell: bash
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/t3000-demo-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ jobs:
{ name: "t3k_qwen3_tests", arch: wormhole_b0, cmd: run_t3000_qwen3_tests, timeout: 60, owner_id: U03HY7MK4BT}, # Mark O'Connor
{ name: "t3k_qwen25_vl_tests", arch: wormhole_b0, cmd: run_t3000_qwen25_vl_tests, timeout: 30, owner_id: U07RY6B5FLJ}, #Gongyu Wang
{ name: "t3k_gemma3_tests", arch: wormhole_b0, cmd: run_t3000_gemma3_tests, timeout: 30, owner_id: U08TJ70UFRT}, # Harry Andrews
# { name: "t3k_gemma_3_27b_tests", arch: wormhole_b0, cmd: run_t3000_gemma_3_27b_tests, timeout: 60, owner_id: }, # TODO: Requires owner ID
]

name: ${{ matrix.test-group.name }}
Expand Down
9 changes: 8 additions & 1 deletion models/tt_transformers/PERF.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ This configuration uses bfp4 MLP and bfp8 attention weights for all models excep
| Phi-3-mini-128k-instruct | N150 | 89 | 99 | 45.0 | 73.32 |
| Phi-3-mini-128k-instruct | N300 | 89 | 99 | 60.87 | 114.94 |
| Mixtral-8x7B-v0.1 | T3K | 95 | 100 | 67.82 | 53.93 |

| gemma-3-1b | N150 | 83 | 95 | 58.93 | 61.95 |
| gemma-3-4b | N150 | 86 | 97 | 36.19 | 64.96 |
| gemma-3-4b | N300 | 86 | 98 | 37.4 | 120.38 |
| gemma-3-27b | T3K | 91 | 99 | 16.73 | 356.81 |

## Accuracy

Expand Down Expand Up @@ -92,6 +95,10 @@ Llama 3 models test as insensitive to attention precision and so we use bfp8 att
| Phi-3-mini-128k-instruct | N150 | 94 | 99 | 40.41 | 82.58 |
| Phi-3-mini-128k-instruct | N300 | 94 | 99 | 57.0 | 115.36 |
| Mixtral-8x7B-v0.1 | T3K | 95 | 100 | 67.82 | 53.93 |
| gemma-3-1b | N150 | 92 | 99 | 56.32 | 52.57 |
| gemma-3-4b | N150 | 90 | 100 | 31.65 | 77.72 |
| gemma-3-4b | N300 | 90 | 99 | 34.94 | 142.08 |
| gemma-3-27b | T3K | 96 | 100 | 15.74 | 359.51 |

## Long-context (64K Tokens)

Expand Down
28 changes: 27 additions & 1 deletion models/tt_transformers/demo/simple_text_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1048,7 +1048,17 @@ def test_demo_text(
)

# Benchmark targets
supported_models = ["Llama-3.2-1B", "Llama-3.2-3B", "Llama-3.1-8B", "Llama-3.2-11B", "Llama-3.1-70B", "Mistral-7B"]
supported_models = [
"Llama-3.2-1B",
"Llama-3.2-3B",
"Llama-3.1-8B",
"Llama-3.2-11B",
"Llama-3.1-70B",
"Mistral-7B",
"gemma-3-1b",
"gemma-3-4b",
"gemma-3-27b",
]
supported_devices = ["N150", "P100", "P150", "P300", "N300", "P150x4", "P150x8", "T3K", "TG"]

tt_device_name = determine_device_name(mesh_device) # submesh device should not decide performance target
Expand Down Expand Up @@ -1097,6 +1107,13 @@ def test_demo_text(
"N300_Mistral-7B": 38, # TODO Update target
"T3K_Mistral-7B": 45, # TODO Update target
"TG_Mistral-7B": 45, # TODO Update target
#
"N150_gemma-3-1b": 20, # TODO Update target
#
"N150_gemma-3-4b": 11, # TODO Update target
"N300_gemma-3-4b": 12, # TODO Update target
#
"T3K_gemma-3-27b": 10, # TODO Update target
}
if model_device_key in dict_target_decode_tok_s_u:
target_decode_tok_s_u = dict_target_decode_tok_s_u[model_device_key]
Expand Down Expand Up @@ -1189,27 +1206,36 @@ def test_demo_text(
"N150_Llama-3.2-3B": 62,
"N150_Llama-3.1-8B": 120,
"N150_Mistral-7B": 106,
"N150_Qwen2.5-7B": 60,
"N150_gemma-3-1b": 62, # TODO Update target
"N150_gemma-3-4b": 65, # TODO Update target
# N300 targets
"N300_Qwen2.5-7B": 90,
"N300_gemma-3-4b": 125, # TODO Update target
# T3K targets
"T3K_Llama-3.1-70B": 204,
"T3K_Qwen2.5-Coder-32B": 173, # `f10cs08`
"T3K_Qwen2.5-72B": 240,
"T3K_Qwen3-32B": 166.5,
"T3K_gemma-3-27b": 330, # TODO Update target
}
ci_target_decode_tok_s_u = {
# N150 targets - higher is better
"N150_Llama-3.2-1B": 66,
"N150_Llama-3.2-3B": 35,
"N150_Llama-3.1-8B": 21,
"N150_Mistral-7B": 23,
"N150_gemma-3-1b": 20, # TODO Update target
"N150_gemma-3-4b": 11, # TODO Update target
# N300 targets
"N300_Qwen2.5-7B": 22.8,
"N300_gemma-3-4b": 35, # TODO Update target
# T3K targets
"T3K_Llama-3.1-70B": 15,
"T3K_Qwen2.5-72B": 13.25,
"T3K_Qwen2.5-Coder-32B": 21,
"T3K_Qwen3-32B": 21,
"T3K_gemma-3-27b": 15, # TODO Update target
}

# Only call verify_perf if the model_device_key exists in the targets
Expand Down
6 changes: 6 additions & 0 deletions models/tt_transformers/demo/simple_vision_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,12 +499,18 @@ def test_multimodal_demo_text(

run_config = (tt_device_name, base_model_name, max_batch_size)
targets_prefill_tok_s = {
("N150", "gemma-3-4b", 1): 265,
("N300", "Llama-3.2-11B", 16): 22.4,
("N300", "gemma-3-4b", 1): 350,
("T3K", "Llama-3.2-90B", 1): 15.3,
("T3K", "gemma-3-27b", 1): 250,
}
targets_decode_tok_s_u = {
("N150", "gemma-3-4b", 1): 20,
("N300", "Llama-3.2-11B", 16): 17,
("N300", "gemma-3-4b", 1): 20,
("T3K", "Llama-3.2-90B", 1): 4.3,
("T3K", "gemma-3-27b", 1): 12,
}

perf_targets = {}
Expand Down
Empty file modified models/tt_transformers/tests/generate_reference_hf.py
100644 → 100755
Empty file.
Binary file not shown.
Loading