diff --git a/README.md b/README.md index b33dced9..fbe76dc3 100644 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ python3 ./tmatmulk.py > ./tmatmulk.pto python3 test/npu_validation/scripts/generate_testcase.py \ --input test/samples/Abs/abs-pto.cpp \ --run-mode npu \ - --soc-version Ascend910B1 + --pto-arch a3 # 2) 运行验证(run.sh 无需额外参数) test/samples/Abs/npu_validation/run.sh diff --git a/include/PTO/IR/PTOOps.td b/include/PTO/IR/PTOOps.td index 267297e2..6d243352 100644 --- a/include/PTO/IR/PTOOps.td +++ b/include/PTO/IR/PTOOps.td @@ -3133,18 +3133,17 @@ def TScatterOp: PTO_TOp<"tscatter", [ let extraClassDeclaration = [{ ::mlir::pto::PIPE getPipe() { - // NOTE: On dav-c220 (Ascend910 A2/A3), pto-isa implements TSCATTER as a + // NOTE: On A2/A3 (--pto-arch=a3), pto-isa implements TSCATTER as a // scalar loop over UB pointers, which executes on the scalar pipeline // (PIPE_S). Waiting on PIPE_V does not block scalar UB accesses and can // lead to using uninitialized indices/data (crash / aivec exception). // - // On A5 instruction set devices, TSCATTER is implemented with vector - // scatter instructions and should be treated as PIPE_V. + // On A5 instruction set devices (--pto-arch=a5), TSCATTER is implemented + // with vector scatter instructions and should be treated as PIPE_V. auto moduleOp = getOperation()->getParentOfType<::mlir::ModuleOp>(); if (moduleOp) { - if (auto spec = moduleOp->getAttrOfType<::mlir::StringAttr>("pto.device-spec")) { - auto s = spec.getValue(); - if (s.starts_with("Ascend950") || s.starts_with("Ascend910_95")) { + if (auto arch = moduleOp->getAttrOfType<::mlir::StringAttr>("pto.target_arch")) { + if (arch.getValue().equals_insensitive("a5")) { return ::mlir::pto::PIPE::PIPE_V; } } diff --git a/test/npu_validation/scripts/generate_testcase.py b/test/npu_validation/scripts/generate_testcase.py index 217a4eaf..a149e090 100644 --- a/test/npu_validation/scripts/generate_testcase.py +++ b/test/npu_validation/scripts/generate_testcase.py @@ -332,11 +332,11 @@ def _inject_packed_pred_mask_preload( return kernel_text[:insert_at] + block + kernel_text[insert_at:] -def _infer_aicore_arch(kernel_text: str, soc_version: str) -> str: +def _infer_aicore_arch(kernel_text: str, pto_arch: Optional[str]) -> str: # Heuristic: kernels that touch cube/L0/L1 tile types or cbuf memories need # the "cube" arch; pure vector kernels can use the vector arch. # - # IMPORTANT: the default arch depends on the Ascend SoC. + # IMPORTANT: the default arch depends on the target architecture. cube_markers = ( "TileType::Mat", "TileType::Left", @@ -354,19 +354,25 @@ def _infer_aicore_arch(kernel_text: str, soc_version: str) -> str: ) needs_cube = any(m in kernel_text for m in cube_markers) - sv = (soc_version or "").lower() - if "950" in sv or "a5" in sv: - # Ascend950 (A5) uses A5 instruction set. pto-isa examples build A5 - # kernels with dav-c310-{vec|cube}. + arch = (pto_arch or "").strip().lower() + if arch == "a5": + # A5 uses A5 instruction set. pto-isa examples build A5 kernels with + # dav-c310-{vec|cube}. return "dav-c310-cube" if needs_cube else "dav-c310-vec" - if "910b" in sv: - # Ascend910B* (e.g. Ascend910B1) uses dav-c310 toolchain arch. - return "dav-c310-cube" if needs_cube else "dav-c310-vec" - - # Default to Ascend910 (dav-c220) when SoC is unknown. + if arch == "a3": + # A2/A3 uses dav-c220 toolchain arch. + return "dav-c220-cube" if needs_cube else "dav-c220-vec" + # Default to Ascend910 (dav-c220) when arch is unknown. return "dav-c220-cube" if needs_cube else "dav-c220-vec" +def _soc_version_for_arch(arch: Optional[str]) -> str: + a = (arch or "").strip().lower() + if a == "a5": + return "Ascend910_9599" + return "Ascend910B1" + + def _parse_int_list(blob: str): items = [] for part in blob.split(","): @@ -811,7 +817,7 @@ def generate_testcase( output_root: Optional[Path], testcase: str, run_mode: str, - soc_version: str, + pto_arch: Optional[str] = None, aicore_arch: Optional[str] = None, ): sample_dir = input_cpp.parent @@ -837,15 +843,15 @@ def generate_testcase( # may be unavailable; build with a vector arch and explicitly enable the # section macros instead. if has_dav_cube or has_dav_vec: - sv = (soc_version or "").lower() - if "950" in sv or "a5" in sv: - aicore_arch = "dav-c310-vec" - elif "910b" in sv: + arch = (pto_arch or "").strip().lower() + if arch == "a5": aicore_arch = "dav-c310-vec" + elif arch == "a3": + aicore_arch = "dav-c220-vec" else: aicore_arch = "dav-c220-vec" else: - aicore_arch = _infer_aicore_arch(raw_kernel, soc_version) + aicore_arch = _infer_aicore_arch(raw_kernel, pto_arch) # Force-define DAV section macros so both sections are compiled into the # same binary. This keeps the generated validation executable self-contained @@ -1195,10 +1201,10 @@ def generate_testcase( (output_dir / "launch.cpp").write_text(launch_cpp, encoding="utf-8") # pto-isa selects instruction implementations based on MEMORY_BASE vs - # REGISTER_BASE. Ascend A5 (e.g. Ascend950) and Ascend910B use REGISTER_BASE. + # REGISTER_BASE. A5 uses REGISTER_BASE. mem_base_define = "MEMORY_BASE" - sv = (soc_version or "").lower() - if "910b" in sv or "950" in sv or "a5" in sv: + arch = (pto_arch or "").strip().lower() + if arch == "a5": mem_base_define = "REGISTER_BASE" # CCE printing support is gated behind `--cce-enable-print` on some bisheng @@ -1225,8 +1231,14 @@ def generate_testcase( set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -if(NOT DEFINED SOC_VERSION) - set(SOC_VERSION Ascend910) +if(NOT DEFINED PTO_ARCH) + set(PTO_ARCH a3) +endif() +string(TOLOWER "${PTO_ARCH}" PTO_ARCH_LC) +if(PTO_ARCH_LC STREQUAL "a5") + set(SIM_SOC_DIR Ascend910_9599) +else() + set(SIM_SOC_DIR Ascend910B1) endif() option(ENABLE_SIM_GOLDEN "Build Ascend simulator (camodel) executable" ON) @@ -1333,9 +1345,9 @@ def generate_testcase( ) target_link_directories({testcase}_sim PUBLIC ${{ASCEND_HOME_PATH}}/lib64 - ${{ASCEND_HOME_PATH}}/aarch64-linux/simulator/${{SOC_VERSION}}/lib - ${{ASCEND_HOME_PATH}}/simulator/${{SOC_VERSION}}/lib - ${{ASCEND_HOME_PATH}}/tools/simulator/${{SOC_VERSION}}/lib + ${{ASCEND_HOME_PATH}}/aarch64-linux/simulator/${{SIM_SOC_DIR}}/lib + ${{ASCEND_HOME_PATH}}/simulator/${{SIM_SOC_DIR}}/lib + ${{ASCEND_HOME_PATH}}/tools/simulator/${{SIM_SOC_DIR}}/lib ) target_link_libraries({testcase}_sim PRIVATE {testcase}_kernel @@ -1390,10 +1402,11 @@ def generate_testcase( encoding="utf-8", ) + arch_for_runsh = (pto_arch or "a3").strip().lower() run_sh = (templates_root / "run_sh_template.sh").read_text(encoding="utf-8") run_sh = run_sh.replace("@EXECUTABLE@", testcase) run_sh = run_sh.replace("@RUN_MODE@", run_mode) - run_sh = run_sh.replace("@SOC_VERSION@", soc_version) + run_sh = run_sh.replace("@PTO_ARCH@", arch_for_runsh) run_path = output_dir / "run.sh" run_path.write_text(run_sh, encoding="utf-8") run_path.chmod(0o755) @@ -1405,7 +1418,7 @@ def main(): parser.add_argument("--testcase", default=None, help="Testcase name (default: derived from input filename)") parser.add_argument("--output-root", default=None, help="Output testcases root directory") parser.add_argument("--run-mode", default="npu", choices=["sim", "npu"], help="Run mode for run.sh") - parser.add_argument("--soc-version", default="Ascend910", help="SOC version for run.sh") + parser.add_argument("--pto-arch", default="a3", help="Target PTO arch (a3 or a5).") parser.add_argument( "--aicore-arch", default=None, @@ -1420,7 +1433,7 @@ def main(): output_root, testcase, args.run_mode, - args.soc_version, + pto_arch=args.pto_arch, aicore_arch=args.aicore_arch, ) diff --git a/test/npu_validation/scripts/run_remote_npu_validation.sh b/test/npu_validation/scripts/run_remote_npu_validation.sh index 43f766dd..0a9c1e24 100644 --- a/test/npu_validation/scripts/run_remote_npu_validation.sh +++ b/test/npu_validation/scripts/run_remote_npu_validation.sh @@ -3,7 +3,7 @@ set -euo pipefail STAGE="${STAGE:-run}" # build|run RUN_MODE="${RUN_MODE:-npu}" # npu|sim -SOC_VERSION="${SOC_VERSION:-Ascend910}" +PTO_ARCH="${PTO_ARCH:-a3}" GOLDEN_MODE="${GOLDEN_MODE:-npu}" # sim|npu|skip PTO_ISA_REPO="${PTO_ISA_REPO:-https://github.com/PTO-ISA/pto-isa.git}" PTO_ISA_COMMIT="${PTO_ISA_COMMIT:-}" @@ -24,7 +24,7 @@ fi log() { echo "[$(date +'%F %T')] $*"; } log "=== Remote NPU Validation ===" -log "STAGE=${STAGE} RUN_MODE=${RUN_MODE} SOC_VERSION=${SOC_VERSION}" +log "STAGE=${STAGE} RUN_MODE=${RUN_MODE} PTO_ARCH=${PTO_ARCH}" log "GOLDEN_MODE=${GOLDEN_MODE}" log "DEVICE_ID=${DEVICE_ID}" log "PTO_ISA_REPO=${PTO_ISA_REPO}" @@ -121,24 +121,23 @@ fi export LD_LIBRARY_PATH="${ASCEND_HOME_PATH}/lib64:${LD_LIBRARY_PATH:-}" -# Some CANN installs do not provide a simulator directory named exactly -# "Ascend910". Map it to a real directory so we can link/run camodel. -SIM_SOC_VERSION="${SOC_VERSION}" -if [[ "${SOC_VERSION}" == "Ascend910" ]]; then - if [[ -d "${ASCEND_HOME_PATH}/aarch64-linux/simulator/Ascend910A/lib" ]]; then - SIM_SOC_VERSION="Ascend910A" - elif [[ -d "${ASCEND_HOME_PATH}/aarch64-linux/simulator/Ascend910ProA/lib" ]]; then - SIM_SOC_VERSION="Ascend910ProA" - fi -fi -log "SIM_SOC_VERSION=${SIM_SOC_VERSION}" +pto_arch_lc="$(printf '%s' "${PTO_ARCH}" | tr '[:upper:]' '[:lower:]')" +case "${pto_arch_lc}" in + a5) SIM_SOC_DIR="Ascend910_9599" ;; + a3) SIM_SOC_DIR="Ascend910B1" ;; + *) + SIM_SOC_DIR="Ascend910B1" + pto_arch_lc="a3" + ;; +esac +log "SIM_SOC_DIR=${SIM_SOC_DIR}" LD_LIBRARY_PATH_NPU="${LD_LIBRARY_PATH}" LD_LIBRARY_PATH_SIM="${LD_LIBRARY_PATH}" for d in \ - "${ASCEND_HOME_PATH}/aarch64-linux/simulator/${SIM_SOC_VERSION}/lib" \ - "${ASCEND_HOME_PATH}/simulator/${SIM_SOC_VERSION}/lib" \ - "${ASCEND_HOME_PATH}/tools/simulator/${SIM_SOC_VERSION}/lib"; do + "${ASCEND_HOME_PATH}/aarch64-linux/simulator/${SIM_SOC_DIR}/lib" \ + "${ASCEND_HOME_PATH}/simulator/${SIM_SOC_DIR}/lib" \ + "${ASCEND_HOME_PATH}/tools/simulator/${SIM_SOC_DIR}/lib"; do [[ -d "$d" ]] && LD_LIBRARY_PATH_SIM="$d:${LD_LIBRARY_PATH_SIM}" done @@ -216,7 +215,7 @@ while IFS= read -r -d '' cpp; do --testcase "${testcase}" \ --output-root "${OUTPUT_ROOT}" \ --run-mode "${RUN_MODE}" \ - --soc-version "${SIM_SOC_VERSION}" + --pto-arch "${PTO_ARCH}" gen_rc=$? set -euo pipefail if [[ $gen_rc -ne 0 ]]; then @@ -236,7 +235,7 @@ while IFS= read -r -d '' cpp; do enable_sim_golden="OFF" [[ "${GOLDEN_MODE}" == "sim" ]] && enable_sim_golden="ON" cmake -S . -B ./build \ - -DSOC_VERSION="${SIM_SOC_VERSION}" \ + -DPTO_ARCH="${PTO_ARCH}" \ -DENABLE_SIM_GOLDEN="${enable_sim_golden}" \ -DPTO_ISA_ROOT="${PTO_ISA_ROOT}" cmake --build ./build --parallel diff --git a/test/npu_validation/templates/run_sh_template.sh b/test/npu_validation/templates/run_sh_template.sh index 5c31a286..7e9597f6 100644 --- a/test/npu_validation/templates/run_sh_template.sh +++ b/test/npu_validation/templates/run_sh_template.sh @@ -2,7 +2,15 @@ set -euo pipefail RUN_MODE="@RUN_MODE@" -SOC_VERSION="@SOC_VERSION@" +PTO_ARCH="${PTO_ARCH:-@PTO_ARCH@}" +if [[ -z "${PTO_ARCH}" || "${PTO_ARCH}" == "@PTO_ARCH@" ]]; then + PTO_ARCH="a3" +fi + +case "${PTO_ARCH,,}" in + a5) SIM_SOC_DIR="Ascend910_9599" ;; + *) SIM_SOC_DIR="Ascend910B1" ;; +esac GOLDEN_MODE="${GOLDEN_MODE:-npu}" # sim|npu|skip BUILD_DIR="${BUILD_DIR:-build}" @@ -56,19 +64,10 @@ fi LD_LIBRARY_PATH_NPU="${LD_LIBRARY_PATH:-}" LD_LIBRARY_PATH_SIM="${LD_LIBRARY_PATH_NPU}" if [[ -n "${ASCEND_HOME_PATH:-}" ]]; then - SIM_SOC_VERSION="${SOC_VERSION}" - if [[ "${SOC_VERSION}" == "Ascend910" ]]; then - if [[ -d "${ASCEND_HOME_PATH}/aarch64-linux/simulator/Ascend910A/lib" ]]; then - SIM_SOC_VERSION="Ascend910A" - elif [[ -d "${ASCEND_HOME_PATH}/aarch64-linux/simulator/Ascend910ProA/lib" ]]; then - SIM_SOC_VERSION="Ascend910ProA" - fi - fi - for d in \ - "${ASCEND_HOME_PATH}/aarch64-linux/simulator/${SIM_SOC_VERSION}/lib" \ - "${ASCEND_HOME_PATH}/simulator/${SIM_SOC_VERSION}/lib" \ - "${ASCEND_HOME_PATH}/tools/simulator/${SIM_SOC_VERSION}/lib"; do + "${ASCEND_HOME_PATH}/aarch64-linux/simulator/${SIM_SOC_DIR}/lib" \ + "${ASCEND_HOME_PATH}/simulator/${SIM_SOC_DIR}/lib" \ + "${ASCEND_HOME_PATH}/tools/simulator/${SIM_SOC_DIR}/lib"; do [[ -d "$d" ]] && LD_LIBRARY_PATH_SIM="$d:${LD_LIBRARY_PATH_SIM}" done fi @@ -78,9 +77,9 @@ cd "${ROOT_DIR}/${BUILD_DIR}" ENABLE_SIM_GOLDEN="OFF" [[ "${GOLDEN_MODE}" == "sim" ]] && ENABLE_SIM_GOLDEN="ON" if [[ -n "${PTO_ISA_ROOT:-}" ]]; then - cmake -DSOC_VERSION="${SIM_SOC_VERSION:-${SOC_VERSION}}" -DENABLE_SIM_GOLDEN="${ENABLE_SIM_GOLDEN}" -DPTO_ISA_ROOT="${PTO_ISA_ROOT}" .. + cmake -DPTO_ARCH="${PTO_ARCH}" -DENABLE_SIM_GOLDEN="${ENABLE_SIM_GOLDEN}" -DPTO_ISA_ROOT="${PTO_ISA_ROOT}" .. else - cmake -DSOC_VERSION="${SIM_SOC_VERSION:-${SOC_VERSION}}" -DENABLE_SIM_GOLDEN="${ENABLE_SIM_GOLDEN}" .. + cmake -DPTO_ARCH="${PTO_ARCH}" -DENABLE_SIM_GOLDEN="${ENABLE_SIM_GOLDEN}" .. fi make -j diff --git a/test/samples/Bf16/bf16_tile.py b/test/samples/Bf16/bf16_tile.py index a3962d2a..1be1ff43 100644 --- a/test/samples/Bf16/bf16_tile.py +++ b/test/samples/Bf16/bf16_tile.py @@ -18,7 +18,7 @@ def build(): pto.register_dialect(ctx, load=True) module = builtin.ModuleOp() - module.attributes["pto.device-spec"] = StringAttr.get("Ascend910B1") + module.attributes["pto.target_arch"] = StringAttr.get("a3") bf16 = BF16Type.get() ptr_bf16 = pto.PtrType.get(bf16) diff --git a/test/samples/MatMul/0.pto b/test/samples/MatMul/0.pto index 83d7c2ae..76db1894 100644 --- a/test/samples/MatMul/0.pto +++ b/test/samples/MatMul/0.pto @@ -1,4 +1,4 @@ -module attributes {"pto.device-spec" = "Ascend910B1"} { +module attributes {"pto.target_arch" = "a3"} { func.func @RunTMATMULSplitK(%arg0: !pto.ptr, %arg1: !pto.ptr, %arg2: !pto.ptr, %arg3: !pto.ptr, %arg4: i1) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/test/samples/MatMul/tmatmulk.pto b/test/samples/MatMul/tmatmulk.pto index 0ccaf8a6..905b911c 100644 --- a/test/samples/MatMul/tmatmulk.pto +++ b/test/samples/MatMul/tmatmulk.pto @@ -1,4 +1,4 @@ -module attributes {"pto.device-spec" = "Ascend910B1"} { +module attributes {"pto.target_arch" = "a3"} { func.func @RunTMATMULSplitK(%arg0: !pto.ptr, %arg1: !pto.ptr, %arg2: !pto.ptr, %arg3: !pto.ptr, %arg4: i1) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/test/samples/MatMul/tmatmulk.py b/test/samples/MatMul/tmatmulk.py index 0984c574..21a1d6a1 100644 --- a/test/samples/MatMul/tmatmulk.py +++ b/test/samples/MatMul/tmatmulk.py @@ -29,7 +29,7 @@ def build( pto.register_dialect(ctx, load=True) module = builtin.ModuleOp() - module.attributes["pto.device-spec"] = StringAttr.get("Ascend910B1") + module.attributes["pto.target_arch"] = StringAttr.get("a3") # ---- element types ---- t_out = F32Type.get() diff --git a/test/samples/Matmul_transpose/Matmul_transpose-pto-ir.pto b/test/samples/Matmul_transpose/Matmul_transpose-pto-ir.pto index 65848d08..0aa22e11 100644 --- a/test/samples/Matmul_transpose/Matmul_transpose-pto-ir.pto +++ b/test/samples/Matmul_transpose/Matmul_transpose-pto-ir.pto @@ -1,4 +1,4 @@ -module attributes {"pto.device-spec" = "Ascend910B1"} { +module attributes {"pto.target_arch" = "a3"} { func.func @RunTEXTRACT(%arg0: !pto.ptr, %arg1: !pto.ptr, %arg2: !pto.ptr, %arg3: i1, %arg4: i1) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/test/samples/Matmul_transpose/Matmul_transpose.py b/test/samples/Matmul_transpose/Matmul_transpose.py index adb843c1..14014211 100644 --- a/test/samples/Matmul_transpose/Matmul_transpose.py +++ b/test/samples/Matmul_transpose/Matmul_transpose.py @@ -41,7 +41,7 @@ def build( pto.register_dialect(ctx, load=True) module = builtin.ModuleOp() - module.attributes["pto.device-spec"] = StringAttr.get("Ascend910B1") + module.attributes["pto.target_arch"] = StringAttr.get("a3") t_out = F32Type.get() t_a = F32Type.get() diff --git a/test/samples/Sync/compensation_test.pto b/test/samples/Sync/compensation_test.pto index 36f02bd3..0373ece3 100644 --- a/test/samples/Sync/compensation_test.pto +++ b/test/samples/Sync/compensation_test.pto @@ -1,4 +1,4 @@ -module attributes {"pto.device-spec" = "Ascend910B1"} { +module attributes {"pto.target_arch" = "a3"} { func.func @compensation_check(%arg0: !pto.ptr, %cond: i1) { %c0 = arith.constant 0 : index %c32 = arith.constant 32 : index diff --git a/test/samples/Sync/matmul.pto b/test/samples/Sync/matmul.pto index 35e040f9..3663ddc1 100644 --- a/test/samples/Sync/matmul.pto +++ b/test/samples/Sync/matmul.pto @@ -1,4 +1,4 @@ -module attributes {"pto.device-spec" = "Ascend910B1"} { +module attributes {"pto.target_arch" = "a3"} { func.func @RunTMATMULSplitK(%arg0: !pto.ptr, %arg1: !pto.ptr, %arg2: !pto.ptr, %arg3: !pto.ptr, %arg4: i1) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/test/samples/Sync/nested_loop_confliect.pto b/test/samples/Sync/nested_loop_confliect.pto index c35f9a67..3fafb1e1 100644 --- a/test/samples/Sync/nested_loop_confliect.pto +++ b/test/samples/Sync/nested_loop_confliect.pto @@ -1,4 +1,4 @@ -module attributes {"pto.device-spec" = "Ascend910B1"} { +module attributes {"pto.target_arch" = "a3"} { func.func @nested_loop_sync(%arg0: !pto.ptr, %arg1: !pto.ptr) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/test/samples/Sync/rar_optimization_test.pto b/test/samples/Sync/rar_optimization_test.pto index 30585376..de4c7fb6 100644 --- a/test/samples/Sync/rar_optimization_test.pto +++ b/test/samples/Sync/rar_optimization_test.pto @@ -1,4 +1,4 @@ -module attributes {"pto.device-spec" = "Ascend910B1"} { +module attributes {"pto.target_arch" = "a3"} { func.func @rar_hazard_check(%arg0: !pto.ptr, %arg1: !pto.ptr) { %c0 = arith.constant 0 : index %c32 = arith.constant 32 : index diff --git a/test/samples/Sync/test_if_else_tile_result.pto b/test/samples/Sync/test_if_else_tile_result.pto index 04a05062..58eca83d 100644 --- a/test/samples/Sync/test_if_else_tile_result.pto +++ b/test/samples/Sync/test_if_else_tile_result.pto @@ -1,4 +1,4 @@ -module attributes {"pto.device-spec" = "Ascend910B1"} { +module attributes {"pto.target_arch" = "a3"} { func.func @test_if_else_tile_result(%arg0: !pto.ptr, %arg1: !pto.ptr, %arg2: i32, %arg3: !pto.ptr) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/test/samples/Sync/tmatmulk_autosync.py b/test/samples/Sync/tmatmulk_autosync.py index 1937fb62..22237c79 100644 --- a/test/samples/Sync/tmatmulk_autosync.py +++ b/test/samples/Sync/tmatmulk_autosync.py @@ -32,7 +32,7 @@ def build( pto.register_dialect(ctx, load=True) module = builtin.ModuleOp() - module.attributes["pto.device-spec"] = StringAttr.get("Ascend910B1") + module.attributes["pto.target_arch"] = StringAttr.get("a3") # ---- element types ---- t_out = F32Type.get() diff --git a/test/samples/Sync/tmatmulk_autosync_a5.py b/test/samples/Sync/tmatmulk_autosync_a5.py index 3ef27e76..15c2b9ad 100644 --- a/test/samples/Sync/tmatmulk_autosync_a5.py +++ b/test/samples/Sync/tmatmulk_autosync_a5.py @@ -32,7 +32,7 @@ def build( pto.register_dialect(ctx, load=True) module = builtin.ModuleOp() - module.attributes["pto.device-spec"] = StringAttr.get("Ascend910B1") + module.attributes["pto.target_arch"] = StringAttr.get("a5") # ---- element types ---- t_out = F32Type.get() diff --git a/test/samples/runop.sh b/test/samples/runop.sh index ee50e931..8e26dd7e 100755 --- a/test/samples/runop.sh +++ b/test/samples/runop.sh @@ -187,13 +187,11 @@ process_one_dir() { esac # A5-only sample: buffer-id synchronization ops lower to CCEC get_buf/rls_buf - # intrinsics, which are not supported on older SoCs (e.g. Ascend910(A3)). - # Skip this python sample unless SOC_VERSION indicates an A5 target. + # intrinsics, which are not supported on A2/A3 (--pto-arch=a3). + # Skip this python sample unless --pto-arch indicates an A5 target. if [[ "$base" == "test_a5_buf_sync" ]]; then - soc="${SOC_VERSION:-}" - soc_lc="$(printf '%s' "${soc}" | tr '[:upper:]' '[:lower:]')" - if [[ "$soc_lc" != *"a5"* && "$soc_lc" != *"950"* ]]; then - echo -e "${A}(${base}.py)\tSKIP\trequires A5 (set SOC_VERSION to A5/950)" + if [[ "$(printf '%s' "$target_arch" | tr '[:upper:]' '[:lower:]')" != "a5" ]]; then + echo -e "${A}(${base}.py)\tSKIP\trequires A5 (set --pto-arch=a5)" continue fi fi diff --git a/tools/ptobc/testdata/matmul_static_singlecore.pto b/tools/ptobc/testdata/matmul_static_singlecore.pto index f4838f53..c7209285 100644 --- a/tools/ptobc/testdata/matmul_static_singlecore.pto +++ b/tools/ptobc/testdata/matmul_static_singlecore.pto @@ -1,4 +1,4 @@ -module attributes {"pto.device-spec" = "Ascend910B1"} { +module attributes {"pto.target_arch" = "a3"} { func.func @RunTMATMULSplitK(%arg0: !pto.ptr, %arg1: !pto.ptr, %arg2: !pto.ptr, %arg3: !pto.ptr, %arg4: i1) { pto.section.cube { %c0 = arith.constant 0 : index