Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions ais_bench/benchmark/configs/models/mf_models/mf_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from ais_bench.benchmark.models.local_models.mindformers_model import MindFormerModel

models = [
dict(
attr="local", # local or service
type=MindFormerModel, # transformers < 4.33.0 用这个,优先AutoModelForCausalLM.from_pretrained加载模型,失败则用AutoModel.from_pretrained加载
Copy link

Copilot AI Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The inline comment for type=MindFormerModel mentions Transformers (<4.33.0) and HuggingFace loading behavior, which is unrelated to MindFormers and can confuse users. Please update the comment to describe MindFormers-specific behavior/requirements (e.g., that it uses yaml_cfg_file + MindFormers checkpoint loading).

Suggested change
type=MindFormerModel, # transformers < 4.33.0 用这个,优先AutoModelForCausalLM.from_pretrained加载模型,失败则用AutoModel.from_pretrained加载
type=MindFormerModel, # MindFormers 模型类型,结合 yaml_cfg_file 配置和 MindFormers checkpoint 加载模型

Copilot uses AI. Check for mistakes.
abbr='mindformer-model',
path='THUDM/chatglm-6b', # path to model dir, current value is just a example
checkpoint = 'THUDM/your_checkpoint', # path to checkpoint file, current value is just a example
yaml_cfg_file = 'THUDM/your.yaml',
tokenizer_path='THUDM/chatglm-6b', # path to tokenizer dir, current value is just a example
model_kwargs=dict( # 模型参数参考 huggingface.co/docs/transformers/v4.50.0/en/model_doc/auto#transformers.AutoModel.from_pretrained
device_map='npu',
),
tokenizer_kwargs=dict( # tokenizer参数参考 huggingface.co/docs/transformers/v4.50.0/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase
padding_side='right',
),
generation_kwargs = dict( # 后处理参数参考huggingface.co/docs/transformers/main_classes/test_generation
temperature = 0.5,
top_k = 10,
top_p = 0.95,
do_sample = True,
seed = None,
repetition_penalty = 1.03,
),
run_cfg = dict(num_gpus=1, num_procs=1), # 多卡/多机多卡 参数,使用torchrun拉起任务
Copy link

Copilot AI Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

run_cfg comment says the task is launched with torchrun, but MindFormerModel.launcher is set to msrun and OpenICLInferTask now supports msrun. Please update this comment to match the actual launcher to avoid misconfiguration.

Suggested change
run_cfg = dict(num_gpus=1, num_procs=1), # 多卡/多机多卡 参数,使用torchrun拉起任务
run_cfg = dict(num_gpus=1, num_procs=1), # 多卡/多机多卡 参数,使用 msrun 拉起任务

Copilot uses AI. Check for mistakes.
max_out_len=100, # 最大输出token长度
batch_size=2, # 每次拆分数据集的batch size
build_batch_size = 2, # 构建静态图模型时使用的build_batch_size>=batch_size
max_seq_len=2048,
batch_padding=True,
)
]
2 changes: 1 addition & 1 deletion ais_bench/benchmark/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
from ais_bench.benchmark.models.api_models.triton_api import TritonCustomAPIStream # noqa: F401
from ais_bench.benchmark.models.api_models.tgi_api import TGICustomAPIStream # noqa: F401
from ais_bench.benchmark.models.api_models.vllm_custom_api_chat import VllmMultiturnAPIChatStream # noqa: F401
from ais_bench.benchmark.models.local_models.vllm_offline_vl import VLLMOfflineVLModel
from ais_bench.benchmark.models.local_models.vllm_offline_vl import VLLMOfflineVLModel
1 change: 1 addition & 0 deletions ais_bench/benchmark/models/local_models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class BaseModel:
"""

is_api: bool = False
launcher: str = "torchrun"

def __init__(self,
path: str,
Expand Down
316 changes: 316 additions & 0 deletions ais_bench/benchmark/models/local_models/mindformers_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
import os, sys
from typing import Dict, List, Optional, Union
Copy link

Copilot AI Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import of 'Union' is not used.

Suggested change
from typing import Dict, List, Optional, Union
from typing import Dict, List, Optional

Copilot uses AI. Check for mistakes.

import numpy as np
import torch
Comment on lines +1 to +5
Copy link

Copilot AI Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os, sys, and torch are imported but never used in this module, which adds noise and can introduce unnecessary dependency constraints. Please remove unused imports to keep the wrapper minimal.

Suggested change
import os, sys
from typing import Dict, List, Optional, Union
import numpy as np
import torch
from typing import Dict, List, Optional, Union
import numpy as np

Copilot uses AI. Check for mistakes.
import transformers

from ais_bench.benchmark.models.local_models.base import BaseModel
from ais_bench.benchmark.models import APITemplateParser
Copy link

Copilot AI Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

APITemplateParser is imported but not used. If MindFormerModel should use a template parser, wire it up explicitly; otherwise remove the import to avoid confusion.

Suggested change
from ais_bench.benchmark.models import APITemplateParser

Copilot uses AI. Check for mistakes.
from ais_bench.benchmark.registry import MODELS
try:
from mindspore import Tensor, Model
from mindformers import MindFormerConfig, build_context
from mindformers.models import build_network
from mindformers.core.parallel_config import build_parallel_config
from mindformers.utils.load_checkpoint_utils import get_load_path_after_hf_convert
from mindformers.trainer.utils import transform_and_load_checkpoint
except ModuleNotFoundError as _mf_err:
MIND_FORMERS_INSTALL_GUIDE = (
"MindSpore/MindFormers is not available in the current environment. "
"To use `MindFormerModel`, install MindSpore and MindFormers and ensure any required "
"environment variables for your device are configured.\n\n"
"Quick steps (examples):\n"
" 1) Follow the official MindSpore installation guide for your platform: "
"https://www.mindspore.cn/install/en.\n"
" 2) Install MindFormers: `pip install mindformers` (or follow its docs); "
"alternatively, add a local MindFormers checkout to your PYTHONPATH, e.g.:\n"
" PYTHONPATH=/path/to/mindformers:$PYTHONPATH\n"
)
raise ModuleNotFoundError(MIND_FORMERS_INSTALL_GUIDE) from _mf_err


class MultiTokenEOSCriteria(transformers.StoppingCriteria):
"""Criteria to stop on the specified multi-token sequence."""

def __init__(
self,
sequence: str,
tokenizer: transformers.PreTrainedTokenizer,
batch_size: int,
):
self.done_tracker = [False] * batch_size
self.sequence = sequence
self.sequence_ids = tokenizer.encode(sequence,
add_special_tokens=False)
self.sequence_id_len = len(self.sequence_ids)
self.tokenizer = tokenizer

def __call__(self, input_ids, scores, **kwargs) -> bool:
# compare the last len(stop) tokens
lookback_ids_batch = input_ids[:, -self.sequence_id_len:]
lookback_tokens_batch = self.tokenizer.batch_decode(lookback_ids_batch)
for i, done in enumerate(self.done_tracker):
if done:
continue
self.done_tracker[i] = self.sequence in lookback_tokens_batch[i]
return False not in self.done_tracker


def drop_error_generation_kwargs(generation_kwargs: dict) -> dict:
for key in ['is_synthetic', 'batch_size', 'do_performance']:
if key in generation_kwargs:
generation_kwargs.pop(key)
return generation_kwargs


@MODELS.register_module()
class MindFormerModel(BaseModel):

launcher: str = "msrun"

def __init__(self,
path: str,
checkpoint: Optional[str] = None,
yaml_cfg_file: Optional[str] = None,
build_batch_size: int = 1,
max_seq_len: int = 2048,
tokenizer_path: Optional[str] = None,
tokenizer_kwargs: dict = dict(),
tokenizer_only: bool = False,
generation_kwargs: dict = dict(),
meta_template: Optional[Dict] = None,
extract_pred_after_decode: bool = False,
batch_padding: bool = False,
pad_token_id: Optional[int] = None,
mode: str = 'none',
use_fastchat_template: bool = False,
end_str: Optional[str] = None,
**kwargs):
super().__init__(path=path,
max_seq_len=max_seq_len,
tokenizer_only=tokenizer_only,
meta_template=meta_template)
self.batch_size = build_batch_size
self.pad_token_id = pad_token_id
self.pretrained_model_path = path
if mode not in ['none', 'mid']:
raise ValueError(f"mode must be 'none' or 'mid', but got {mode}")
self.mode = mode
if not yaml_cfg_file:
raise ValueError('`yaml_cfg_file` is required for MindFormerModel')
self.config = MindFormerConfig(yaml_cfg_file)
self.checkpoint = checkpoint
self._load_tokenizer(path=path,
tokenizer_path=tokenizer_path,
tokenizer_kwargs=tokenizer_kwargs)
self.batch_padding = batch_padding
self.extract_pred_after_decode = extract_pred_after_decode
if not tokenizer_only:
self._load_model(self.config, self.batch_size, self.max_seq_len)
self.generation_kwargs = generation_kwargs
self.use_fastchat_template = use_fastchat_template
self.end_str = end_str

def _load_tokenizer(self, path: str, tokenizer_path: Optional[str],
tokenizer_kwargs: dict):
from transformers import AutoTokenizer, GenerationConfig

DEFAULT_TOKENIZER_KWARGS = dict(padding_side='left', truncation_side='left', trust_remote_code=True)
kwargs = DEFAULT_TOKENIZER_KWARGS.copy()
kwargs.update(tokenizer_kwargs)

load_path = tokenizer_path if tokenizer_path else path
self.tokenizer = AutoTokenizer.from_pretrained(load_path, **kwargs)

pad_token_id = self.pad_token_id

# A patch for some models without pad_token_id
if pad_token_id is not None:
if self.tokenizer.pad_token_id is None:
self.logger.debug(f'Using {pad_token_id} as pad_token_id')
elif self.tokenizer.pad_token_id != pad_token_id:
self.logger.warning(f'pad_token_id is not consistent. Using {pad_token_id} as pad_token_id')
self.tokenizer.pad_token_id = pad_token_id
return
if self.tokenizer.pad_token_id is not None:
return
self.logger.warning('pad_token_id is not set for the tokenizer.')

try:
generation_config = GenerationConfig.from_pretrained(path)
except Exception:
generation_config = None

if generation_config and generation_config.pad_token_id is not None:
self.logger.warning(f'Using {generation_config.pad_token_id} as pad_token_id.')
self.tokenizer.pad_token_id = generation_config.pad_token_id
return
if self.tokenizer.eos_token_id is not None:
self.logger.warning(f'Using eos_token_id {self.tokenizer.eos_token_id} as pad_token_id.')
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
return
raise ValueError('pad_token_id is not set for this tokenizer. Please set `pad_token_id={PAD_TOKEN_ID}` in model_cfg.')

def _set_config_from_yaml(self):
if self.checkpoint is not None:
self.config.load_checkpoint = self.checkpoint
elif self.checkpoint is None and self.config.load_checkpoint is None:
self.config.load_checkpoint = self.path
self.config.model.pretrained_model_dir = self.pretrained_model_path
self.config.model.model_config.seq_length = self.max_seq_len
build_context(self.config)
build_parallel_config(self.config)

def _load_model(self, config, batch_size, max_seq_len):

self._set_config_from_yaml()
try:
self.model = build_network(
config.model,
default_args={
"parallel_config": config.parallel_config,
"moe_config": config.moe_config
})
self.logger.info("..........Network Built Successfully..........")
self.model.set_train(False)
config.load_checkpoint = get_load_path_after_hf_convert(config, self.model)
self.logger.info(f"load checkpoint path : {config.load_checkpoint}")
run_mode = config.get("run_mode", None)
if run_mode == "predict":
self.model.load_weights(config.load_checkpoint)
else:
model = Model(self.model)
input_ids = Tensor(np.ones((batch_size, max_seq_len), dtype=np.int32))
infer_data = self.model.prepare_inputs_for_predict_layout(input_ids)
transform_and_load_checkpoint(config, model, self.model, infer_data, do_eval=True)

self.logger.info("..........Checkpoint Load Successfully..........")
except ValueError as e:
raise ValueError('Failed to load MindFormers model, please check configuration') from e


def generate(self,
inputs: List[str],
max_out_len: int,
min_out_len: Optional[int] = None,
stopping_criteria: List[str] = [],
**kwargs) -> List[str]:
"""Generate results given a list of inputs.

Args:
inputs (List[str]): A list of strings.
max_out_len (int): The maximum length of the output.
min_out_len (Optional[int]): The minimum length of the output.

Returns:
List[str]: A list of generated strings.
"""
generation_kwargs = kwargs.copy()
generation_kwargs.update(self.generation_kwargs)

messages = list(inputs)
batch_size = len(messages)
prompt_char_lens = None

if self.extract_pred_after_decode:
prompt_char_lens = [len(text) for text in messages]

if self.use_fastchat_template:
try:
from fastchat.model import get_conversation_template
except ModuleNotFoundError:
raise ModuleNotFoundError(
'Fastchat is not implemented. You can use '
"'pip install \"fschat[model_worker,webui]\"' "
'to implement fastchat.')
for idx, text in enumerate(messages):
conv = get_conversation_template('vicuna')
conv.append_message(conv.roles[0], text)
conv.append_message(conv.roles[1], None)
messages[idx] = conv.get_prompt()
if self.mode == 'mid':
assert len(messages) == 1
tokens = self.tokenizer(messages, padding=False, truncation=False, return_tensors='np')
input_ids = tokens['input_ids']
if input_ids.shape[-1] > self.max_seq_len:
input_ids = np.concatenate([input_ids[:, : self.max_seq_len // 2], input_ids[:, - self.max_seq_len // 2:]], axis=-1)
Comment on lines +233 to +237
Copy link

Copilot AI Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using assert len(messages) == 1 for input validation is fragile because assertions can be disabled with Python optimizations, and it produces an unhelpful failure for users. Please raise a regular exception (e.g., ValueError/ParameterValueError) with a clear message when mode == 'mid' is used with batch inputs.

Suggested change
assert len(messages) == 1
tokens = self.tokenizer(messages, padding=False, truncation=False, return_tensors='np')
input_ids = tokens['input_ids']
if input_ids.shape[-1] > self.max_seq_len:
input_ids = np.concatenate([input_ids[:, : self.max_seq_len // 2], input_ids[:, - self.max_seq_len // 2:]], axis=-1)
if len(messages) != 1:
raise ValueError(
f"mode 'mid' does not support batch inputs: expected 1 message, got {len(messages)}."
)
tokens = self.tokenizer(messages, padding=False, truncation=False, return_tensors='np')
input_ids = tokens['input_ids']
if input_ids.shape[-1] > self.max_seq_len:
input_ids = np.concatenate(
[input_ids[:, : self.max_seq_len // 2], input_ids[:, - self.max_seq_len // 2:]],
axis=-1,
)

Copilot uses AI. Check for mistakes.
tokens = {'input_ids': input_ids}
else:
tokenize_kwargs = dict(
padding=True,
truncation=True,
max_length=self.max_seq_len,
return_tensors='np'
)
tokens = self.tokenizer(messages, **tokenize_kwargs)

input_ids = tokens['input_ids']
if len(messages) > 1:
attention_mask = tokens.get('attention_mask')
prompt_token_lens = (
attention_mask.sum(axis=1).astype(int).tolist()
if attention_mask is not None else
[input_ids.shape[1]] * batch_size
)
else:
prompt_token_lens = [len(ids) for ids in input_ids]

input_ids_tensor = Tensor(input_ids)

if min_out_len is not None:
generation_kwargs['min_new_tokens'] = min_out_len
generation_kwargs['max_new_tokens'] = max_out_len
generation_kwargs.setdefault('top_k', 1)
generation_kwargs.setdefault('return_dict_in_generate', False)

origin_stopping_criteria = list(stopping_criteria)
if stopping_criteria:
if self.tokenizer.eos_token is not None:
stopping_criteria = stopping_criteria + [
self.tokenizer.eos_token
]
stopping_list = transformers.StoppingCriteriaList([
*[
MultiTokenEOSCriteria(sequence, self.tokenizer,
input_ids_tensor.shape[0])
for sequence in stopping_criteria
],
])
generation_kwargs['stopping_criteria'] = stopping_list

generation_kwargs = drop_error_generation_kwargs(generation_kwargs)

outputs = self.model.generate(input_ids=input_ids_tensor,
**generation_kwargs)

if isinstance(outputs, dict):
outputs = outputs.get('sequences', outputs)
if outputs is None:
raise ValueError("Model output dictionary is missing 'sequence' key.")
Copy link

Copilot AI Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message says the output dict is missing 'sequence', but the code actually looks for the 'sequences' key. This mismatch will mislead debugging; update the message (or the key) so they are consistent.

Suggested change
raise ValueError("Model output dictionary is missing 'sequence' key.")
raise ValueError("Model output dictionary is missing 'sequences' key.")

Copilot uses AI. Check for mistakes.

sequences = [seq.tolist() for seq in outputs]

if not self.extract_pred_after_decode:
sequences = [
seq[prompt_len:]
for seq, prompt_len in zip(sequences, prompt_token_lens)
]

decodeds = [
self.tokenizer.decode(seq, skip_special_tokens=True)
for seq in sequences
]

if self.extract_pred_after_decode and prompt_char_lens is not None:
decodeds = [
text[length:]
for text, length in zip(decodeds, prompt_char_lens)
]

if self.end_str:
decodeds = [text.split(self.end_str)[0] for text in decodeds]
if origin_stopping_criteria:
for token in origin_stopping_criteria:
decodeds = [text.split(token)[0] for text in decodeds]
return decodeds
Loading
Loading