hw-native-sys · Hzfengsy · Feb 24, 2026 · Feb 13, 2026 · Feb 13, 2026 · Feb 24, 2026
diff --git a/python/pypto/ir/op/block_ops.py b/python/pypto/ir/op/block_ops.py
@@ -279,6 +279,20 @@ def full(
     return _ir_core.create_op_call("block.full", [shape_tuple, value_expr], kwargs, actual_span)
 
 
+def fillpad(tile: Expr, span: Span | None = None) -> Call:
+    """Fill tile with padding for remaining elements.
+
+    Args:
+        tile: Input tile (TileType)
+        span: Optional source span for debugging (auto-captured if not provided)
+
+    Returns:
+        Call expression that returns the filled and padded tile
+    """
+    actual_span = _get_span_or_capture(span)
+    return _ir_core.create_op_call("block.fillpad", [tile], {}, actual_span)
+
+
 # ============================================================================
 # Element-wise Operations
 # ============================================================================
@@ -583,7 +597,7 @@ def cast(
         raise ValueError(f"Invalid rounding mode '{mode}'. Expected one of {list(modes.keys())}.")
 
     actual_span = _get_span_or_capture(span)
-    kwargs: dict[str, Any] = {"target_dtype": target_type, "mode": mode_val}
+    kwargs: dict[str, Any] = {"target_type": target_type, "mode": mode_val}
     return _ir_core.create_op_call("block.cast", [tile], kwargs, actual_span)
 
 

diff --git a/python/pypto/language/op/block_ops.py b/python/pypto/language/op/block_ops.py
@@ -24,6 +24,7 @@
     "move",
     "ub_copy",
     "full",
+    "fillpad",
     "get_block_idx",
     "add",
     "sub",
@@ -214,7 +215,6 @@ def full(shape: list[int], dtype: DataType, value: int | float) -> Tile:
         shape: Shape of the tile
         dtype: Data type of the tile
         value: filling scalar
-        span: Optional source span for debugging (auto-captured if not provided)
 
     Returns:
         Tile wrapping the full operation
@@ -223,6 +223,19 @@ def full(shape: list[int], dtype: DataType, value: int | float) -> Tile:
     return Tile(expr=call_expr)
 
 
+def fillpad(tile: Tile) -> Tile:
+    """Fill tile with padding for remaining elements.
+
+    Args:
+        tile: Input tile
+
+    Returns:
+        Tile wrapping the fillpad operation
+    """
+    call_expr = _ir_ops.fillpad(tile.unwrap())
+    return Tile(expr=call_expr)
+
+
 def get_block_idx() -> Scalar:
     """Get the current block index.
 

diff --git a/src/backend/910B_CCE/backend_910b_cce_ops.cpp b/src/backend/910B_CCE/backend_910b_cce_ops.cpp
@@ -673,6 +673,12 @@ REGISTER_BACKEND_OP(Backend910B_CCE, "block.row_expand_add")
       return MakeBinaryElementwiseCodegenCCE("TROWEXPANDADD", op, codegen);
     });
 
+REGISTER_BACKEND_OP(Backend910B_CCE, "block.fillpad")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeUnaryCodegenCCE("TFILLPAD", op, codegen);
+    });
+
 REGISTER_BACKEND_OP(Backend910B_CCE, "block.col_expand")
     .set_pipe(ir::PipeType::V)
     .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {

diff --git a/src/backend/910B_PTO/backend_910b_pto_ops.cpp b/src/backend/910B_PTO/backend_910b_pto_ops.cpp
@@ -112,7 +112,19 @@ static std::string MakeTernaryTileTileCodegenPTO(const std::string& pto_op_name,
   return "";
 }
 
-// Helper function for binary Tile-Scalar operations
+// Helper function for full op
+static std::string MakeFullCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
+                                      codegen::CodegenBase& codegen_base) {
+  auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
+  CHECK(op->args_.size() == 2) << "full op requires 3 arguments."
+                               << op->args_.size();  // Actually 2 args, two of them are conbined!
+  std::string scalar = codegen.GetExprAsCode(op->args_[1]);
+  std::string dst = codegen.GetCurrentResultTarget();
+  codegen.Emit(pto_op_name + " " + "ins(" + scalar + ") outs(" + dst + ")");
+  return "";
+}
-// Helper function for full op
-static std::string MakeFullCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
-                                      codegen::CodegenBase& codegen_base) {
-  auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
-  CHECK(op->args_.size() == 2) << "full op requires 3 arguments."
-                               << op->args_.size();  // Actually 2 args, two of them are conbined!
-  std::string scalar = codegen.GetExprAsCode(op->args_[1]);
-  std::string dst = codegen.GetCurrentResultTarget();
-  codegen.Emit(pto_op_name + " " + "ins(" + scalar + ") outs(" + dst + ")");
-  return "";
-}
+// Helper function for full op
+static std::string MakeFullCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
+                                      codegen::CodegenBase& codegen_base) {
+  auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
+  CHECK(op->args_.size() == 2) << "full op requires 2 arguments, got " << op->args_.size();
+  std::string scalar = codegen.GetExprAsCode(op->args_[1]);
+  std::string dst = codegen.GetCurrentResultTarget();
+  codegen.Emit(pto_op_name + " " + "ins(" + scalar + ") outs(" + dst + ")");
+  return "";
+}
-// Helper function for full op
-static std::string MakeFullCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
-                                      codegen::CodegenBase& codegen_base) {
-  auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
-  CHECK(op->args_.size() == 2) << "full op requires 3 arguments."
-                               << op->args_.size();  // Actually 2 args, two of them are conbined!
-  std::string scalar = codegen.GetExprAsCode(op->args_[1]);
-  std::string dst = codegen.GetCurrentResultTarget();
-  codegen.Emit(pto_op_name + " " + "ins(" + scalar + ") outs(" + dst + ")");
-  return "";
-}
+// Helper function for full op
+static std::string MakeFullCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
+                                      codegen::CodegenBase& codegen_base) {
+  auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
+  CHECK(op->args_.size() == 2) << "full op requires 2 arguments, got " << op->args_.size();
+  std::string scalar = codegen.GetExprAsCode(op->args_[1]);
+  std::string dst = codegen.GetCurrentResultTarget();
+  codegen.Emit(pto_op_name + " " + "ins(" + scalar + ") outs(" + dst + ")");
+  return "";
+}
+
+// Helper function for Binary Tile-Scalar operations
 static std::string MakeBinaryTileScalarCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
                                                   codegen::CodegenBase& codegen_base) {
   auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
@@ -175,6 +187,33 @@ static std::string MakeTernaryGEMVCodegenPTO(const std::string& pto_op_name, con
   return "";
 }
 
+// Helper function for padding operations
+static std::string MakeFillPadCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
+                                         codegen::CodegenBase& codegen_base) {
+  auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
+  CHECK(op->args_.size() == 1) << "Fill pad op requires 1 argument.";
+  codegen.Emit(pto_op_name + " " + GenerateInsOutsClause(op, codegen));
+  return "";
+}
+
+// Helper function for Ternary Data Movement/Layout operations
+static std::string MakeTernaryDataMoveLayoutCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
+                                                       codegen::CodegenBase& codegen_base) {
+  auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
+  CHECK(op->args_.size() == 3) << "Ternary move/layout op requires 3 arguments.";
+  codegen.Emit(pto_op_name + " " + GenerateInsOutsClause(op, codegen));
+  return "";
+}
+
+// Helper function for Binary Axis Reduction/Expansion operations
+static std::string MakeBinaryAxisCodegenPTO(const std::string& pto_op_name, const CallPtr& op,
+                                            codegen::CodegenBase& codegen_base) {
+  auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
+  CHECK(op->args_.size() == 2) << "Binary Axis op requires 2 arguments.";
+  codegen.Emit(pto_op_name + " " + GenerateInsOutsClause(op, codegen));
+  return "";
+}
+
 // block.load: emit pto.subview + pto.tload (same format as original IR layer codegen)
 static std::string MakeBlockLoadCodegenPTO(const CallPtr& op, codegen::CodegenBase& codegen_base) {
   auto& codegen = dynamic_cast<codegen::PTOCodegen&>(codegen_base);
@@ -524,7 +563,13 @@ REGISTER_BACKEND_OP(Backend910B_PTO, "block.mins")
       return MakeBinaryTileScalarCodegenPTO("pto.tmins", op, codegen);
     });
 
-// Not Implemented: tlrelu tcmps taddsc tsubsc tsels texpands
+REGISTER_BACKEND_OP(Backend910B_PTO, "block.full")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeFullCodegenPTO("pto.texpands", op, codegen);
+    });
+
+// Not Implemented: tlrelu tcmps taddsc tsubsc tsels
 
 // ============================================================================
 // Matrix Multiplication Operations
@@ -584,6 +629,66 @@ REGISTER_BACKEND_OP(Backend910B_PTO, "block.gemv_bias")
       return MakeTernaryGEMVCodegenPTO("pto.tgemv.bias", op, codegen);
     });
 
+// ============================================================================
+// Data Movement/Layout Operations
+// ============================================================================
+
+REGISTER_BACKEND_OP(Backend910B_PTO, "block.transpose")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeTernaryDataMoveLayoutCodegenPTO("pto.ttrans", op, codegen);
+    });
+
+// ============================================================================
+// Axis reduction/expansion Operations
+// ============================================================================
+
+REGISTER_BACKEND_OP(Backend910B_PTO, "block.row_sum")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeBinaryAxisCodegenPTO("pto.trowsum", op, codegen);
+    });
+
+REGISTER_BACKEND_OP(Backend910B_PTO, "block.row_max")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeBinaryAxisCodegenPTO("pto.trowmax", op, codegen);
+    });
+
+REGISTER_BACKEND_OP(Backend910B_PTO, "block.row_min")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeBinaryAxisCodegenPTO("pto.trowmin", op, codegen);
+    });
+
+REGISTER_BACKEND_OP(Backend910B_PTO, "block.row_expand_div")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeBinaryAxisCodegenPTO("pto.trowexpanddiv", op, codegen);
+    });
+
+REGISTER_BACKEND_OP(Backend910B_PTO, "block.row_expand_mul")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeBinaryAxisCodegenPTO("pto.trowexpandmul", op, codegen);
+    });
+
+REGISTER_BACKEND_OP(Backend910B_PTO, "block.row_expand_sub")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeBinaryAxisCodegenPTO("pto.trowexpandsub", op, codegen);
+    });
+
+// ============================================================================
+// Padding Operations
+// ============================================================================
+
+REGISTER_BACKEND_OP(Backend910B_PTO, "block.fillpad")
+    .set_pipe(ir::PipeType::V)
+    .f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
+      return MakeFillPadCodegenPTO("pto.tfillpad", op, codegen);
+    });
+
 // ============================================================================
 // Memory Operations
 // ============================================================================

diff --git a/src/ir/op/block_ops/elementwise.cpp b/src/ir/op/block_ops/elementwise.cpp
@@ -268,5 +268,23 @@ REGISTER_OP("block.cmps")
       return DeduceBlockCmpType(args, kwargs, "block.cmps", true);
     });
 
+REGISTER_OP("block.fillpad")
+    .set_op_category("BlockOp")
+    .set_description("Fill destination tile with source tile data and pad remaining elements")
+    .add_argument("tile", "Input tile (TileType)")
+    .f_deduce_type([](const std::vector<ExprPtr>& args,
+                      const std::vector<std::pair<std::string, std::any>>& kwargs) {
+      CHECK(args.size() == 1) << "The operator block.fillpad requires exactly 1 argument, but got "
+                              << args.size();
+
+      // Argument must be TileType
+      auto tile_type = As<TileType>(args[0]->GetType());
+      CHECK(tile_type) << "The operator block.fillpad requires first argument to be a TileType, but got "
+                       << args[0]->GetType()->TypeName();
+
+      // Return same TileType
+      return std::make_shared<TileType>(tile_type->shape_, tile_type->dtype_);
+    });
+
 }  // namespace ir
 }  // namespace pypto