Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/t3000-demo-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
# This requirements and comment removed when https://github.com/tenstorrent/tt-metal/pull/22608 merges.
{ name: "t3k_qwen3_tests", arch: wormhole_b0, cmd: run_t3000_qwen3_tests, timeout: 60, owner_id: U03HY7MK4BT}, # Mark O'Connor
{ name: "t3k_qwen25_vl_tests", arch: wormhole_b0, cmd: run_t3000_qwen25_vl_tests, timeout: 60, owner_id: U07RY6B5FLJ}, #Gongyu Wang
# { name: "t3k_mistral_24b_tests", arch: wormhole_b0, cmd: run_t3000_mistral_24b_tests, timeout: 60, owner_id: }, # TO-DO: Requires owner ID
]

name: ${{ matrix.test-group.name }}
Expand Down
2 changes: 2 additions & 0 deletions models/tt_transformers/PERF.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ This configuration uses bfp4 MLP and bfp8 attention weights for all models excep
| Mistral-7B | N150 | 95 | 99 | 29.75 | 100.24 |
| Mistral-7B | N300 | 95 | 99 | 47.01 | 65.95 |
| Mistral-7B | T3K | 95 | 99 | 67.82 | 53.93 |
| Mistral-24B | T3K | 95 | 100 | 37.77 | 131.6ms |


## Accuracy
Expand Down Expand Up @@ -82,6 +83,7 @@ Llama 3 models test as insensitive to attention precision and so we use bfp8 att
| Mistral-7B | N150 | 95 | 99 | 29.75 | 100.24 |
| Mistral-7B | N300 | 95 | 99 | 47.01 | 65.95 |
| Mistral-7B | T3K | 95 | 99 | 67.82 | 53.93 |
| Mistral-24B | T3K | 95 | 100 | 33.27 | 130.39ms |

## Long-context (64K Tokens)

Expand Down
12 changes: 11 additions & 1 deletion models/tt_transformers/demo/simple_text_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,15 @@ def test_demo_text(
)

# Benchmark targets
supported_models = ["Llama-3.2-1B", "Llama-3.2-3B", "Llama-3.1-8B", "Llama-3.2-11B", "Llama-3.1-70B", "Mistral-7B"]
supported_models = [
"Llama-3.2-1B",
"Llama-3.2-3B",
"Llama-3.1-8B",
"Llama-3.2-11B",
"Llama-3.1-70B",
"Mistral-7B",
"Mistral-Small-3.1-24B",
]
supported_devices = ["N150", "P100", "P150", "P300", "N300", "P150x4", "P150x8", "T3K", "TG"]

tt_device_name = determine_device_name(mesh_device) # submesh device should not decide performance target
Expand Down Expand Up @@ -1004,6 +1012,7 @@ def test_demo_text(
"N300_Mistral-7B": 38, # TODO Update target
"T3K_Mistral-7B": 45, # TODO Update target
"TG_Mistral-7B": 45, # TODO Update target
"T3K_Mistral-Small-3.1-24B": 33, # TODO Update target
}
if model_device_key in dict_target_decode_tok_s_u:
target_decode_tok_s_u = dict_target_decode_tok_s_u[model_device_key]
Expand Down Expand Up @@ -1099,6 +1108,7 @@ def test_demo_text(
# "T3K_Qwen2.5-72B": 13, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24303)
"T3K_Qwen2.5-Coder-32B": 21,
# "T3K_Qwen3-32B": 20, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24303)
"T3K_Mistral-Small-3.1-24B": 33, # TODO Update target
}

# Only call verify_perf if the model_device_key exists in the targets
Expand Down
3 changes: 3 additions & 0 deletions models/tt_transformers/demo/simple_vision_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,19 +471,22 @@ def test_multimodal_demo_text(
logger.info("")

logger.info(f"is_ci_env: {is_ci_env}")
is_ci_env = True
if is_ci_env and max_batch_size == 1 and enable_trace: # Only profiling these parametrizations
tt_device_name = model_args[0].device_name
base_model_name = model_args[0].base_model_name
target_prefill_tok_s = {
"N300_Llama-3.2-11B": 23,
"T3K_Llama-3.2-11B": 20,
"T3K_Llama-3.2-90B": 3,
"T3K_Mistral-Small-3.1-24B": 1254.50,
}[f"{tt_device_name}_{base_model_name}"]

target_decode_tok_s_u = {
"N300_Llama-3.2-11B": 21.5,
"T3K_Llama-3.2-11B": 34.25,
"T3K_Llama-3.2-90B": 6,
"T3K_Mistral-Small-3.1-24B": 28.50,
}[f"{tt_device_name}_{base_model_name}"]

target_decode_tok_s = target_decode_tok_s_u * max_batch_size
Expand Down
Binary file not shown.
24 changes: 24 additions & 0 deletions tests/scripts/t3000/run_t3000_demo_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,30 @@ run_t3000_qwen25_vl_tests() {
fi
}

run_t3000_mistral_24b_tests() {
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_t3000_mistral_24b_tests"

wh_arch_yaml=wormhole_b0_80_arch_eth_dispatch.yaml

# Mistral-24B
mistral24b=/mnt/MLPerf/tt_dnn-models/mistral/Mistral-24B-Instruct
mesh_device=T3K

MESH_DEVICE=$mesh_device HF_MODEL=$mistral24b pytest -n auto models/tt_transformers/demo/simple_vision_demo.py -k "batch1-notrace" --timeout 1200; fail+=$?
echo "LOG_METAL: Mistral-24B tests for $mesh_device completed"

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_t3000_mistral_24b_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}

run_t3000_qwen3_tests() {
# Record the start time
fail=0
Expand Down