Skip to content

Commit cdc8656

Browse files
committed
Mistral-24B: Evaluate performance metrics
1 parent 38608b3 commit cdc8656

File tree

7 files changed

+42
-3
lines changed

7 files changed

+42
-3
lines changed

.github/workflows/t3000-demo-tests-impl.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ jobs:
4040
# This requirements and comment removed when https://github.com/tenstorrent/tt-metal/pull/22608 merges.
4141
{ name: "t3k_qwen3_tests", arch: wormhole_b0, cmd: run_t3000_qwen3_tests, timeout: 60, owner_id: U03HY7MK4BT}, # Mark O'Connor
4242
{ name: "t3k_qwen25_vl_tests", arch: wormhole_b0, cmd: run_t3000_qwen25_vl_tests, timeout: 60, owner_id: U07RY6B5FLJ}, #Gongyu Wang
43+
# { name: "t3k_mistral_24b_tests", arch: wormhole_b0, cmd: run_t3000_mistral_24b_tests, timeout: 60, owner_id: }, # TO-DO: Requires owner ID
4344
]
4445

4546
name: ${{ matrix.test-group.name }}

models/tt_transformers/PERF.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ This configuration uses bfp4 MLP and bfp8 attention weights for all models excep
4545
| Mistral-7B | N150 | 95 | 99 | 29.75 | 100.24 |
4646
| Mistral-7B | N300 | 95 | 99 | 47.01 | 65.95 |
4747
| Mistral-7B | T3K | 95 | 99 | 67.82 | 53.93 |
48+
| Mistral-24B | T3K | 95 | 100 | 37.77 | 131.6ms |
4849

4950

5051
## Accuracy
@@ -82,6 +83,7 @@ Llama 3 models test as insensitive to attention precision and so we use bfp8 att
8283
| Mistral-7B | N150 | 95 | 99 | 29.75 | 100.24 |
8384
| Mistral-7B | N300 | 95 | 99 | 47.01 | 65.95 |
8485
| Mistral-7B | T3K | 95 | 99 | 67.82 | 53.93 |
86+
| Mistral-24B | T3K | 95 | 100 | 33.27 | 130.39ms |
8587

8688
## Long-context (64K Tokens)
8789

models/tt_transformers/demo/simple_text_demo.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,15 @@ def test_demo_text(
955955
)
956956

957957
# Benchmark targets
958-
supported_models = ["Llama-3.2-1B", "Llama-3.2-3B", "Llama-3.1-8B", "Llama-3.2-11B", "Llama-3.1-70B", "Mistral-7B"]
958+
supported_models = [
959+
"Llama-3.2-1B",
960+
"Llama-3.2-3B",
961+
"Llama-3.1-8B",
962+
"Llama-3.2-11B",
963+
"Llama-3.1-70B",
964+
"Mistral-7B",
965+
"Mistral-Small-3.1-24B",
966+
]
959967
supported_devices = ["N150", "P100", "P150", "P300", "N300", "P150x4", "P150x8", "T3K", "TG"]
960968

961969
tt_device_name = determine_device_name(mesh_device) # submesh device should not decide performance target
@@ -1004,6 +1012,7 @@ def test_demo_text(
10041012
"N300_Mistral-7B": 38, # TODO Update target
10051013
"T3K_Mistral-7B": 45, # TODO Update target
10061014
"TG_Mistral-7B": 45, # TODO Update target
1015+
"T3K_Mistral-Small-3.1-24B": 33, # TODO Update target
10071016
}
10081017
if model_device_key in dict_target_decode_tok_s_u:
10091018
target_decode_tok_s_u = dict_target_decode_tok_s_u[model_device_key]
@@ -1099,6 +1108,7 @@ def test_demo_text(
10991108
# "T3K_Qwen2.5-72B": 13, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24303)
11001109
"T3K_Qwen2.5-Coder-32B": 21,
11011110
# "T3K_Qwen3-32B": 20, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24303)
1111+
"T3K_Mistral-Small-3.1-24B": 33, # TODO Update target
11021112
}
11031113

11041114
# Only call verify_perf if the model_device_key exists in the targets

models/tt_transformers/demo/simple_vision_demo.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,19 +471,22 @@ def test_multimodal_demo_text(
471471
logger.info("")
472472

473473
logger.info(f"is_ci_env: {is_ci_env}")
474+
is_ci_env = True
474475
if is_ci_env and max_batch_size == 1 and enable_trace: # Only profiling these parametrizations
475476
tt_device_name = model_args[0].device_name
476477
base_model_name = model_args[0].base_model_name
477478
target_prefill_tok_s = {
478479
"N300_Llama-3.2-11B": 23,
479480
"T3K_Llama-3.2-11B": 20,
480481
"T3K_Llama-3.2-90B": 3,
482+
"T3K_Mistral-Small-3.1-24B": 1254.50,
481483
}[f"{tt_device_name}_{base_model_name}"]
482484

483485
target_decode_tok_s_u = {
484486
"N300_Llama-3.2-11B": 21.5,
485487
"T3K_Llama-3.2-11B": 34.25,
486488
"T3K_Llama-3.2-90B": 6,
489+
"T3K_Mistral-Small-3.1-24B": 28.50,
487490
}[f"{tt_device_name}_{base_model_name}"]
488491

489492
target_decode_tok_s = target_decode_tok_s_u * max_batch_size
Binary file not shown.

models/tt_transformers/tt/model_config.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ def __init__(
567567
"Phi-3.5-mini-instruct": {"N150": 128, "N300": 128, "T3K": 128, "TG": 128, "P150x4": 128},
568568
"QwQ-32B": {"N150": None, "N300": None, "T3K": 64, "TG": 128, "P150x4": 128},
569569
"Qwen3-32B": {"N150": None, "N300": None, "T3K": 64, "TG": 128, "P150x4": 128},
570+
"Mistral-Small-3.1-24B": {"N150": None, "N300": None, "T3K": 128, "TG": 128, "P150x4": 128},
570571
}
571572
try:
572573
max_prefill_chunk_size_div1024 = MAX_PREFILL_CHUNK_SIZES_DIV1024[self.base_model_name][self.device_name]
@@ -1379,8 +1380,6 @@ def prepare_residual_tensor_prefill(self, x_bsh, force_replicated=False):
13791380

13801381
def _get_text_prefix(self):
13811382
if self.is_vision():
1382-
if "Mistral-Small-3.1-24B-Instruct-2503" in self.model_name:
1383-
return "language_model."
13841383
return "text_model."
13851384
else:
13861385
return ""

tests/scripts/t3000/run_t3000_demo_tests.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,30 @@ run_t3000_qwen25_vl_tests() {
121121
fi
122122
}
123123

124+
run_t3000_mistral_24b_tests() {
125+
fail=0
126+
start_time=$(date +%s)
127+
128+
echo "LOG_METAL: Running run_t3000_mistral_24b_tests"
129+
130+
wh_arch_yaml=wormhole_b0_80_arch_eth_dispatch.yaml
131+
132+
# Mistral-24B
133+
mistral24b=/mnt/MLPerf/tt_dnn-models/mistral/Mistral-24B-Instruct
134+
mesh_device=T3K
135+
136+
MESH_DEVICE=$mesh_device HF_MODEL=$mistral24b pytest -n auto models/tt_transformers/demo/simple_vision_demo.py -k "batch1-notrace" --timeout 1200; fail+=$?
137+
echo "LOG_METAL: Mistral-24B tests for $mesh_device completed"
138+
139+
# Record the end time
140+
end_time=$(date +%s)
141+
duration=$((end_time - start_time))
142+
echo "LOG_METAL: run_t3000_mistral_24b_tests $duration seconds to complete"
143+
if [[ $fail -ne 0 ]]; then
144+
exit 1
145+
fi
146+
}
147+
124148
run_t3000_qwen3_tests() {
125149
# Record the start time
126150
fail=0

0 commit comments

Comments
 (0)