1+ import sys
2+ import os
3+ # Add current directory to Python path for local module imports
4+ sys .path .insert (0 , os .path .abspath ("." ))
5+ from ms_mindnlp .transformers .models .llama .modeling_llama import LlamaModel
6+ from ms_mindnlp .transformers .models .llama .configuration_llama import LlamaConfig
7+ import mindspore as ms
8+ from mindspore import dtype , ops
9+ import debugpy
10+
11+ debugpy .listen (("0.0.0.0" , 5678 ))
12+ print ("Waiting for debugger to attach..." )
13+
14+ debugpy .wait_for_client ()
15+ print ("Debugger is attached." )
16+
17+ # import inspect
18+ # llama_config_file_path = inspect.getfile(LlamaConfig)
19+ # print(f"{llama_config_file_path}")
20+
21+ ms .set_context (mode = ms .PYNATIVE_MODE )
22+
23+ def run ():
24+ """Main execution function for LLaMA model inference demo"""
25+ config = LlamaConfig (
26+ vocab_size = 32000 , # Tokenizer vocabulary size
27+ hidden_size = 4096 , # Hidden layer dimension
28+ intermediate_size = 11008 , # FFN layer inner dimension
29+ num_hidden_layers = 2 , # Number of transformer blocks
30+ num_attention_heads = 32 , # Parallel attention heads
31+ num_key_value_heads = 2 , # KV heads for grouped-query attention
32+ max_position_embeddings = 2048 , # Maximum sequence length
33+ )
34+ model = LlamaModel (config = config )
35+ # Generate random input tensor: (batch_size=2, seq_length=16)
36+ input_ids = ops .randint (0 , config .vocab_size , (2 , 16 ), dtype = dtype .int32 )
37+ output = model (input_ids = input_ids )
38+ print ("inference" )
39+ print (output )
40+
41+ if __name__ == "__main__" :
42+ run ()
0 commit comments