diff --git a/README.md b/README.md index 7c14a058..84ea28db 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,13 @@ xmake && xmake install - 运行模型推理测试 ```bash -python scripts/jiuge.py [--cpu | --nvidia | --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] path/to/model_dir [n_device] +python scripts/jiuge.py [--cpu | --nvidia | --qy | --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] path/to/model_dir [n_device] ``` - 部署模型推理服务 ```bash -python scripts/launch_server.py --model-path MODEL_PATH [-h] [--dev {cpu,nvidia,cambricon,ascend,metax,moore,iluvatar,kunlun,hygon}] [--ndev NDEV] [--max-batch MAX_BATCH] [--max-tokens MAX_TOKENS] +python scripts/launch_server.py --model-path MODEL_PATH [-h] [--dev {cpu,nvidia,qy, cambricon,ascend,metax,moore,iluvatar,kunlun,hygon}] [--ndev NDEV] [--max-batch MAX_BATCH] [--max-tokens MAX_TOKENS] ``` - 测试模型推理服务性能 diff --git a/scripts/jiuge.py b/scripts/jiuge.py index 7c31baf8..a234f5e9 100644 --- a/scripts/jiuge.py +++ b/scripts/jiuge.py @@ -84,7 +84,7 @@ def match(state_dict): class JiugeMetaFromLlama(JiugeMetaCStruct): - def __init__(self, config, dtype=torch.float16, max_tokens=None): + def __init__(self, config, dtype=torch.bfloat16, max_tokens=None): if dtype == torch.float16: dt_ = DataType.INFINI_DTYPE_F16 elif dtype == torch.float32: @@ -146,7 +146,7 @@ def __init__( meta, naming, state_dict, - torch_dt_mat=torch.float16, + torch_dt_mat=torch.bfloat16, torch_dt_norm=torch.float32, ndev=1, transpose_weight=True, @@ -825,7 +825,7 @@ def destroy_model_instance(self): def test(): if len(sys.argv) < 3: print( - "Usage: python jiuge.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] [n_device] [--verbose]" + "Usage: python jiuge.py [--cpu | --nvidia| --qy| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] [n_device] [--verbose]" ) sys.exit(1) @@ -844,6 +844,8 @@ def test(): device_type = DeviceType.DEVICE_TYPE_CPU elif sys.argv[1] == "--nvidia": device_type = DeviceType.DEVICE_TYPE_NVIDIA + elif sys.argv[1] == "--qy": + device_type = DeviceType.DEVICE_TYPE_QY elif sys.argv[1] == "--cambricon": device_type = DeviceType.DEVICE_TYPE_CAMBRICON elif sys.argv[1] == "--ascend": @@ -860,7 +862,7 @@ def test(): device_type = DeviceType.DEVICE_TYPE_HYGON else: print( - "Usage: python jiuge.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] [n_device] [--verbose]" + "Usage: python jiuge.py [--cpu | --nvidia| --qy| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] [n_device] [--verbose]" ) sys.exit(1) diff --git a/scripts/jiuge_ppl.py b/scripts/jiuge_ppl.py index 061ab303..923d209c 100644 --- a/scripts/jiuge_ppl.py +++ b/scripts/jiuge_ppl.py @@ -7,6 +7,7 @@ DEVICE_TYPE_MAP = { "cpu": DeviceType.DEVICE_TYPE_CPU, "nvidia": DeviceType.DEVICE_TYPE_NVIDIA, + "qy": DeviceType.DEVICE_TYPE_QY, "cambricon": DeviceType.DEVICE_TYPE_CAMBRICON, "ascend": DeviceType.DEVICE_TYPE_ASCEND, "metax": DeviceType.DEVICE_TYPE_METAX, @@ -19,6 +20,7 @@ TORCH_DEVICE_TYPE_MAP = { "cpu": "cpu", "nvidia": "cuda", + "qy": "cuda", "cambricon": "mlu", "ascend": "npu", "metax": "cuda", diff --git a/scripts/launch_server.py b/scripts/launch_server.py index 2d231b49..659163c6 100644 --- a/scripts/launch_server.py +++ b/scripts/launch_server.py @@ -20,6 +20,7 @@ DEVICE_TYPE_MAP = { "cpu": DeviceType.DEVICE_TYPE_CPU, "nvidia": DeviceType.DEVICE_TYPE_NVIDIA, + "qy": DeviceType.DEVICE_TYPE_QY, "cambricon": DeviceType.DEVICE_TYPE_CAMBRICON, "ascend": DeviceType.DEVICE_TYPE_ASCEND, "metax": DeviceType.DEVICE_TYPE_METAX, diff --git a/scripts/libinfinicore_infer/base.py b/scripts/libinfinicore_infer/base.py index bed65b2e..3305cdba 100644 --- a/scripts/libinfinicore_infer/base.py +++ b/scripts/libinfinicore_infer/base.py @@ -36,6 +36,7 @@ class DeviceType(ctypes.c_int): DEVICE_TYPE_ILUVATAR = 6 DEVICE_TYPE_KUNLUN = 7 DEVICE_TYPE_HYGON = 8 + DEVICE_TYPE_QY = 9 class KVCacheCStruct(ctypes.Structure):