Skip to content

Commit 103512b

Browse files
authored
Add files via upload
1 parent 0701b5a commit 103512b

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import sys
2+
import os
3+
# Add current directory to Python path for local module imports
4+
sys.path.insert(0, os.path.abspath("."))
5+
from ms_mindnlp.transformers.models.llama.modeling_llama import LlamaModel
6+
from ms_mindnlp.transformers.models.llama.configuration_llama import LlamaConfig
7+
import mindspore as ms
8+
from mindspore import dtype, ops
9+
import debugpy
10+
11+
debugpy.listen(("0.0.0.0", 5678))
12+
print("Waiting for debugger to attach...")
13+
14+
debugpy.wait_for_client()
15+
print("Debugger is attached.")
16+
17+
# import inspect
18+
# llama_config_file_path = inspect.getfile(LlamaConfig)
19+
# print(f"{llama_config_file_path}")
20+
21+
ms.set_context(mode=ms.PYNATIVE_MODE)
22+
23+
def run():
24+
"""Main execution function for LLaMA model inference demo"""
25+
config = LlamaConfig(
26+
vocab_size=32000, # Tokenizer vocabulary size
27+
hidden_size=4096, # Hidden layer dimension
28+
intermediate_size=11008, # FFN layer inner dimension
29+
num_hidden_layers=2, # Number of transformer blocks
30+
num_attention_heads=32, # Parallel attention heads
31+
num_key_value_heads=2, # KV heads for grouped-query attention
32+
max_position_embeddings=2048, # Maximum sequence length
33+
)
34+
model = LlamaModel(config=config)
35+
# Generate random input tensor: (batch_size=2, seq_length=16)
36+
input_ids = ops.randint(0, config.vocab_size, (2, 16), dtype=dtype.int32)
37+
output = model(input_ids=input_ids)
38+
print("inference")
39+
print(output)
40+
41+
if __name__ == "__main__":
42+
run()

0 commit comments

Comments
 (0)