diff --git a/src/xtc/cli/explore.py b/src/xtc/cli/explore.py index b411480d..ecf54125 100644 --- a/src/xtc/cli/explore.py +++ b/src/xtc/cli/explore.py @@ -99,6 +99,31 @@ def xtc_conv2d_graph( return gb.graph +def xtc_conv2d_nhwc_frsc_graph( + n: int, + h: int, + w: int, + f: int, + r: int, + s: int, + c: int, + SH: int, + SW: int, + ftype: str, + name: str = "matmul", +) -> Graph: + import xtc.graphs.xtc.op as O + + dtype = DTYPES_MAP[ftype] + a = O.tensor((n, h * SH + r - 1, w * SW + s - 1, c), dtype, name="A") + b = O.tensor((f, r, s, c), dtype, name="B") + with O.graph(name=name) as gb: + tp = O.transpose(b, axes=(1, 2, 3, 0), name="T") + O.conv2d(a, tp, stride=(SH, SW), name="O") + graph = gb.graph + return graph + + def tvm_impl(graph: Graph) -> tuple[Backend, str]: from xtc.backends.tvm import Backend @@ -210,12 +235,12 @@ def compile_one( ) ) if args.save_temps: - compile_args.update( - dict( - save_temps=True, - save_temps_dir=f"{args.save_temps_dir}/{ident}", - ) - ) + tmp_path = Path(args.save_temps_dir) / ident + tmp_path.mkdir(parents=True, exist_ok=True) + compile_args.update(dict(save_temps=True, save_temps_dir=str(tmp_path))) + with open(tmp_path / f"{ident}_graph.txt", "w") as outf: + print(graph, file=outf) + assert args.eval == "eval" compiler = impl.get_compiler(**compile_args) module = compiler.compile(schedule=schedule) @@ -252,7 +277,7 @@ def load_and_evaluate_sample( results, code, error_msg = evaluate() if code == 0: time = min(results) - logger.debug(" Evaluated: %s: %s: time: %.2f msecs", ident, in_x, time * 1000) + logger.debug(" Evaluated: %s: %s: time: %.3f msecs", ident, in_x, time * 1000) else: time = 0 logger.error("Error evaluating: %s: %s: %d: %s", ident, in_x, code, error_msg) @@ -524,7 +549,6 @@ def peak_time(args: NS) -> float: assert flops != 0, f"unable to evaluate machine flops for type {dtype}" dims_names = OPERATORS[args.operator]["dims"] dims_map = {k: v for k, v in zip(dims_names, args.dims)} - flop = mulall([d for k, d in dims_map.items() if k.lower() == k]) time = flop / flops / args.threads return time @@ -634,22 +658,23 @@ def optimize(args: NS): } evaluate_sample(strategy, schedule, graph, args, callbacks=callbacks) for row in result_callback._rows: - in_x, time, peak, backend = row[-4:] + in_x, time, peak, backend = row[-4] tqdm.write( - f"Schedule: {backend}: {in_x}: time: {time * 1000:.2f} msecs, peak perf: {peak * 100:.2f}%" + f"Schedule: {backend}: {in_x}: time: {time * 1000:.3f} msecs, peak perf: {peak * 100:.2f}%" ) else: search_some(strategy, graph, args) -def get_strategy(graph: Graph, args: NS) -> Strategy: - def strat_name(name: str) -> str: - alias = STRATEGIES_ALIASES.get(name) - if alias is None: - return name - return strat_name(alias) +def strategy_name(name: str) -> str: + alias = STRATEGIES_ALIASES.get(name) + if alias is None: + return name + return strategy_name(alias) - name = strat_name(args.strategy) + +def get_strategy(graph: Graph, args: NS) -> Strategy: + name = strategy_name(args.strategy) options = dict( threads=args.threads, max_unroll=args.max_unroll, @@ -716,6 +741,27 @@ def strat_name(name: str) -> str: }, "default_strategy": "tile_oo", }, + "conv2d_nhwc_frsc": { + "dims": ["n", "h", "w", "f", "r", "s", "c", "SH", "SW"], + "default_dims": [1, 112, 112, 64, 7, 7, 3, 2, 2], + "default_type": "f32", + "inputs": [ + ["n", "h * SH + r - 1", "w * SW + s - 1", "c"], + ["f", "r", "s", "c"], + ], + "outputs": [["n", "h", "w", "f"]], + "reference_impl": None, # defaults to graph evaluation + "operation": xtc_conv2d_nhwc_frsc_graph, + "backends": { + "mlir": { + "implementer": mlir_impl, + }, + "tvm": { + "implementer": tvm_impl, + }, + }, + "default_strategy": "tile_oo", + }, "relu": { "dims": ["i"], "default_dims": [512 * 1024], diff --git a/src/xtc/csrcs/runtimes/host/evaluate_perf.c b/src/xtc/csrcs/runtimes/host/evaluate_perf.c index d7f0e622..0ab33bed 100644 --- a/src/xtc/csrcs/runtimes/host/evaluate_perf.c +++ b/src/xtc/csrcs/runtimes/host/evaluate_perf.c @@ -30,10 +30,10 @@ typedef int (*packed_func_t)(PackedArg *, int *, int, PackedArg *, int *); #define NUMBER_FACTOR 2 -#define define_evaluateN(FUNC, ...) \ +#define define_evaluateN(FUNC, ...) { \ assert(repeat > 0); \ - assert(number > 0); \ + assert(number >= 0); \ assert(min_repeat_ms >= 0); \ \ int fd = -1; \ @@ -50,9 +50,13 @@ typedef int (*packed_func_t)(PackedArg *, int *, int, PackedArg *, int *); } \ open_perf_events(events_num, events, perf_fds); \ \ - mem_barrier(); \ - (void)func(__VA_ARGS__); \ - mem_barrier(); \ + if (number > 0) { \ + mem_barrier(); \ + (void)func(__VA_ARGS__); \ + mem_barrier(); \ + } else { \ + number = 1; \ + } \ \ for (int r = 0; r < repeat; r++) { \ double elapsed; \ diff --git a/src/xtc/graphs/xtc/builder.py b/src/xtc/graphs/xtc/builder.py index 1f8580c1..b157b1e4 100644 --- a/src/xtc/graphs/xtc/builder.py +++ b/src/xtc/graphs/xtc/builder.py @@ -6,6 +6,7 @@ from .graph import XTCGraph from .context import XTCGraphContext +from .expr import XTCExpr class graph_builder: @@ -25,3 +26,9 @@ def __exit__(self, *_: Any) -> None: def graph(self) -> XTCGraph: assert self._graph is not None, "can't get graph inside builder context" return self._graph + + def set_outputs(self, *outs: XTCExpr) -> None: + return XTCGraphContext.current.set_outputs(*outs) + + def set_inputs(self, *inps: XTCExpr) -> None: + return XTCGraphContext.current.set_inputs(*inps) diff --git a/src/xtc/graphs/xtc/context.py b/src/xtc/graphs/xtc/context.py index e7e26395..6572d5cf 100644 --- a/src/xtc/graphs/xtc/context.py +++ b/src/xtc/graphs/xtc/context.py @@ -35,6 +35,12 @@ def add_outputs(self, *outs: XTCExpr) -> None: def add_inputs(self, *inps: XTCExpr) -> None: self._inputs.extend(inps) + def set_outputs(self, *outs: XTCExpr) -> None: + self._outputs = list(outs) + + def set_inputs(self, *inps: XTCExpr) -> None: + self._inputs = list(inps) + def _infer_inputs(self, inps_seed: list[XTCExpr]) -> list[XTCExpr]: defs = set(self._exprs) uses = [] @@ -42,7 +48,10 @@ def _infer_inputs(self, inps_seed: list[XTCExpr]) -> list[XTCExpr]: if isinstance(expr, XTCOpExpr): for arg in expr.args: uses.append(arg) - inputs = inps_seed + [use for use in uses if use not in defs] + inputs = list({use._idx: use for use in uses if use not in defs}.values()) + # when no order specified, sort by expr id + inputs = sorted(inputs, key=lambda x: x._idx) + inputs = inps_seed + inputs inputs = list({expr._idx: expr for expr in inputs}.values()) return inputs @@ -113,11 +122,5 @@ def append(self, expr: XTCExpr, name: str | None = None) -> XTCExpr: scope.add_expr(expr, name) return expr - def outputs(self, *outs: XTCExpr) -> None: - return self.current.add_outputs(*outs) - - def inputs(self, *inps: XTCExpr) -> None: - return self.current.add_inputs(*inps) - XTCGraphContext = XTCGraphScopes() diff --git a/src/xtc/graphs/xtc/expr.py b/src/xtc/graphs/xtc/expr.py index d177c106..98fc89bc 100644 --- a/src/xtc/graphs/xtc/expr.py +++ b/src/xtc/graphs/xtc/expr.py @@ -14,6 +14,7 @@ from .operators import ( XTCOperator, XTCOperTensor, + XTCOperCompute, XTCOperMatmul, XTCOperRelu, XTCOperConv2D, @@ -76,6 +77,10 @@ def forward_types( @abstractmethod def forward(self, inputs: Sequence[Tensor]) -> Sequence[XTCTensor]: ... + @property + @abstractmethod + def op(self) -> XTCOperator: ... + @property @abstractmethod def op_name(self) -> str: ... @@ -150,6 +155,11 @@ def type(self) -> XTCTensorType: assert isinstance(self._value, XTCTensor) return self._value.type + @property + @override + def op(self) -> XTCOperTensor: + return self._op + @property @override def op_name(self) -> str: @@ -180,11 +190,16 @@ def __str__(self) -> str: class XTCOpExpr(XTCExpr): - def __init__(self, op: XTCOperator, args: ArgumentsType) -> None: + def __init__(self, op: XTCOperCompute, args: ArgumentsType) -> None: super().__init__() self._op = op self._args = args + @property + @override + def op(self) -> XTCOperCompute: + return self._op + @property @override def op_name(self) -> str: diff --git a/src/xtc/graphs/xtc/node.py b/src/xtc/graphs/xtc/node.py index 97491171..caf2e9ec 100644 --- a/src/xtc/graphs/xtc/node.py +++ b/src/xtc/graphs/xtc/node.py @@ -136,9 +136,7 @@ def _operation(self) -> XTCOperation: assert self._outputs_types is not None inputs_types = self._inputs_types outputs_types = self._outputs_types - assert isinstance(self._expr, XTCTensorExpr) or isinstance( - self._expr, XTCOpExpr - ) + assert isinstance(self._expr, XTCOpExpr) return self._expr._op.get_operation( inps_types=inputs_types, outs_types=outputs_types, diff --git a/src/xtc/graphs/xtc/operation.py b/src/xtc/graphs/xtc/operation.py index 574cc7ae..6eba25f2 100644 --- a/src/xtc/graphs/xtc/operation.py +++ b/src/xtc/graphs/xtc/operation.py @@ -5,10 +5,12 @@ from typing_extensions import override from collections.abc import Mapping, Sequence from typing import Any +import json +from xtc.graphs.xtc.data import XTCTensorType from xtc.itf.graph import Operation from xtc.itf.graph.operation import AccessesMaps -from xtc.itf.data import TensorType +from xtc.utils.math import mulall class XTCOperation(Operation): @@ -16,8 +18,8 @@ def __init__( self, name: str, attrs: Mapping[str, Any], - inputs_types: Sequence[TensorType], - outputs_types: Sequence[TensorType], + inputs_types: Sequence[XTCTensorType], + outputs_types: Sequence[XTCTensorType], dims: Mapping[str, int | str], kinds: Sequence[str], inps_maps: Sequence[Sequence[str]], @@ -48,12 +50,12 @@ def attrs(self) -> Mapping[str, Any]: @property @override - def inputs_types(self) -> Sequence[TensorType]: + def inputs_types(self) -> Sequence[XTCTensorType]: return self._inputs_types @property @override - def outputs_types(self) -> Sequence[TensorType]: + def outputs_types(self) -> Sequence[XTCTensorType]: return self._outputs_types @property @@ -69,3 +71,30 @@ def dims_kind(self, kind: str) -> Sequence[str]: @override def accesses_maps(self) -> AccessesMaps: return self._maps + + @property + @override + def ops_count(self) -> int: + # Assume single output, hence estimate + # ops as the product of all dimensions + # in the iteration space + shape = self._outputs_types[0].constant_shape + ops_count = mulall(list(shape)) + return ops_count + + @property + @override + def ops_dtype(self) -> str: + # Assume single output, hence estimate + # dtype as the first output dtype + return self._outputs_types[0].constant_dtype + + @property + @override + def signature(self) -> list[Any]: + # Normalize json + return json.loads( + json.dumps( + [self.name, list(self.dims.values()), self.ops_dtype, dict(self.attrs)] + ) + ) diff --git a/src/xtc/graphs/xtc/operators.py b/src/xtc/graphs/xtc/operators.py index f4798cc4..3e6ebd0e 100644 --- a/src/xtc/graphs/xtc/operators.py +++ b/src/xtc/graphs/xtc/operators.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024-2026 The XTC Project Authors # +from abc import abstractmethod from typing_extensions import override from typing import TypeAlias, cast, Any from types import SimpleNamespace as NS @@ -12,6 +13,7 @@ from xtc.itf.operator import Operator from xtc.itf.data import Tensor, TensorType +from xtc.utils.math import mulall from .data import XTCTensor, XTCTensorType from .operation import XTCOperation @@ -53,6 +55,26 @@ def forward_types( def forward(self, inputs: Sequence[Tensor]) -> Sequence[XTCTensor]: return [cast(XTCTensor, inp) for inp in inputs] + @classmethod + def get_op_signature(cls, name: str, *spec: Any, **kwspec: Any) -> list[Any]: + map = dict( + matmul=XTCOperMatmul, + conv2d=XTCOperConv2D, + relu=XTCOperRelu, + pad2d=XTCOperPad2D, + reshape=XTCOperReshape, + transpose=XTCOperTranspose, + ) + oper_cls = cast(XTCOperCompute, map[name]) + return oper_cls.get_signature(*spec, **kwspec) + + +class XTCOperTensor(XTCOperator): + def __init__(self) -> None: + super().__init__("tensor") + + +class XTCOperCompute(XTCOperator): def get_operation( self, inps_types: Sequence[XTCTensorType], @@ -86,32 +108,12 @@ def _get_operation( outs_maps=outs_maps, ) - -class XTCOperTensor(XTCOperator): - def __init__(self) -> None: - super().__init__("tensor") - - @override - def get_operation( - self, - inps_types: Sequence[XTCTensorType], - outs_types: Sequence[XTCTensorType], - ) -> XTCOperation: - inputs_types = [inp.constant for inp in inps_types] - outputs_types = [out.constant for out in outs_types] - return XTCOperation( - name=self.name, - attrs=self.attrs.__dict__, - inputs_types=tuple(inputs_types), - outputs_types=tuple(outputs_types), - dims={"i": outputs_types[0].size}, - kinds=("P",), - inps_maps=(), - outs_maps=(("i",)), - ) + @classmethod + @abstractmethod + def get_signature(cls, *spec: Any, **kwspec: Any) -> list[Any]: ... -class XTCOperMatmul(XTCOperator): +class XTCOperMatmul(XTCOperCompute): def __init__(self) -> None: super().__init__("matmul") @@ -121,10 +123,12 @@ def get_operation( inps_types: Sequence[XTCTensorType], outs_types: Sequence[XTCTensorType], ) -> XTCOperation: - inp0_shape = inps_types[0].constant_shape - inp1_shape = inps_types[1].constant_shape - i, k = inp0_shape - bk, j = inp1_shape + Ashape = inps_types[0].constant_shape + Bshape = inps_types[1].constant_shape + Ashape = (Ashape[0], mulall(list(Ashape[1:]))) + Bshape = (mulall(list(Bshape[:-1])), Bshape[-1]) + i, k = Ashape + bk, j = Bshape assert k == bk return self._get_operation( inps_types, @@ -146,10 +150,14 @@ def forward_types( assert len(inputs_types) == 2 assert inputs_types[0].shape is not None assert inputs_types[1].shape is not None - assert len(inputs_types[0].shape) == 2 - assert len(inputs_types[1].shape) == 2 - i, k = cast(XTCTensorType, inputs_types[0]).constant_shape - bk, j = cast(XTCTensorType, inputs_types[1]).constant_shape + assert len(inputs_types[0].shape) >= 2 + assert len(inputs_types[1].shape) >= 2 + Ashape = cast(XTCTensorType, inputs_types[0]).constant_shape + Bshape = cast(XTCTensorType, inputs_types[1]).constant_shape + Ashape = (Ashape[0], mulall(list(Ashape[1:]))) + Bshape = (mulall(list(Bshape[:-1])), Bshape[-1]) + i, k = Ashape + bk, j = Bshape assert k == bk, ( f"incompatible dimension k for matmul inputs shapes: ({i}, {k}) ({bk}, {j})" ) @@ -162,15 +170,25 @@ def forward_types( @override def forward(self, inputs: Sequence[Tensor]) -> Sequence[XTCTensor]: - matmul = XTCTensor(np.matmul(inputs[0].numpy(), inputs[1].numpy())) + A = inputs[0].numpy() + B = inputs[1].numpy() + A = A.reshape((A.shape[0], -1)) + B = B.reshape((-1, B.shape[-1])) + matmul = XTCTensor(np.matmul(A, B)) expected_type = self.forward_types([inp.type for inp in inputs])[0] assert matmul.type == expected_type, ( f"output type mismatch expect: {matmul.type} != {expected_type}" ) return [matmul] + @override + @classmethod + def get_signature(cls, *spec: Any, **kwspec: Any) -> list[Any]: + i, j, k, dtype = spec + return ["matmul", [i, j, k], dtype, {}] + -class XTCOperRelu(XTCOperator): +class XTCOperRelu(XTCOperCompute): def __init__(self, **attrs: XTCOperatorAttr) -> None: super().__init__("relu", **attrs) self._threshold = 0 if "threshold" not in attrs else self.attrs.threshold @@ -197,8 +215,22 @@ def forward(self, inputs: Sequence[Tensor]) -> Sequence[XTCTensor]: relu = XTCTensor(np.maximum(inputs[0].numpy(), self._threshold)) return [relu] + @override + @classmethod + def get_signature(cls, *spec: Any, **kwspec: Any) -> list[Any]: + i, dtype = spec + threshold = float(kwspec["threshold"]) + return [ + "relu", + [i], + dtype, + dict( + threshold=threshold, + ), + ] + -class XTCOperConv2D(XTCOperator): +class XTCOperConv2D(XTCOperCompute): def __init__(self, **attrs: XTCOperatorAttr) -> None: super().__init__("conv2d", **attrs) if "stride" not in attrs: @@ -294,8 +326,22 @@ def forward(self, inputs: Sequence[Tensor]) -> Sequence[XTCTensor]: ) return [conv] + @override + @classmethod + def get_signature(cls, *spec: Any, **kwspec: Any) -> list[Any]: + n, h, w, f, r, s, c, dtype = spec + stride = list(kwspec["stride"]) + return [ + "conv2d", + [n, h, w, f, r, s, c], + dtype, + dict( + stride=stride, + ), + ] + -class XTCOperPad2D(XTCOperator): +class XTCOperPad2D(XTCOperCompute): def __init__(self, **attrs: XTCOperatorAttr) -> None: padding = attrs.get("padding", (0, 0, 0, 0)) if isinstance(padding, int): @@ -375,8 +421,22 @@ def forward(self, inputs: Sequence[Tensor]) -> Sequence[XTCTensor]: ) return [padded] + @override + @classmethod + def get_signature(cls, *spec: Any, **kwspec: Any) -> list[Any]: + b, h, w, c, dtype = spec + padding = list(kwspec["padding"]) + return [ + "pad2d", + [b, h, w, c], + dtype, + dict( + padding=padding, + ), + ] + -class XTCOperReshape(XTCOperator): +class XTCOperReshape(XTCOperCompute): def __init__(self, **attrs: XTCOperatorAttr) -> None: super().__init__("reshape", **attrs) if "shape" not in attrs: @@ -428,8 +488,22 @@ def forward(self, inputs: Sequence[Tensor]) -> Sequence[XTCTensor]: ) return [reshaped] + @override + @classmethod + def get_signature(cls, *spec: Any, **kwspec: Any) -> list[Any]: + i, dtype = spec + shape = list(kwspec["shape"]) + return [ + "reshape", + [i], + dtype, + dict( + shape=shape, + ), + ] + -class XTCOperTranspose(XTCOperator): +class XTCOperTranspose(XTCOperCompute): def __init__(self, **attrs: XTCOperatorAttr) -> None: axes = attrs.get("axes", ()) super().__init__("transpose", axes=axes) @@ -478,3 +552,17 @@ def get_operation( inps_maps=(("i",)), outs_maps=(("i",)), # TODO: invalid ) + + @override + @classmethod + def get_signature(cls, *spec: Any, **kwspec: Any) -> list[Any]: + i, dtype = spec + axes = list(kwspec["axes"]) + return [ + "transpose", + [i], + dtype, + dict( + axes=axes, + ), + ] diff --git a/src/xtc/itf/graph/operation.py b/src/xtc/itf/graph/operation.py index a9d98345..c9e3bd05 100644 --- a/src/xtc/itf/graph/operation.py +++ b/src/xtc/itf/graph/operation.py @@ -113,3 +113,45 @@ def accesses_maps(self) -> AccessesMaps: Accesses map for this operation """ ... + + @property + @abstractmethod + def ops_count(self) -> int: + """Returns an estimate of the operation count for + the operation. Used for operation weight estimates + or peak performance estimations. + + For instance on a matmul with dimensions i, j, k, + this returns i*j*k. + + Returns: + Estimated ops count + """ + ... + + @property + @abstractmethod + def ops_dtype(self) -> str: + """Returns the datatype string for the estimated + ops count. + + This is generally the dtype of the output data. + + Returns: + Data type string of the operation + """ + ... + + @property + @abstractmethod + def signature(self) -> list[Any]: + """Returns an unique signature for the operation among + all possible operations of the particular instances. + + The signature should be a combination of base types and + is used for logs databases. + + Returns: + List of fields for unique identification of the operation + """ + ... diff --git a/src/xtc/runtimes/host/evaluator.py b/src/xtc/runtimes/host/evaluator.py index 78663844..e23f4194 100644 --- a/src/xtc/runtimes/host/evaluator.py +++ b/src/xtc/runtimes/host/evaluator.py @@ -109,7 +109,7 @@ def __init__( pmu_counters: list[str] = [], ) -> None: assert repeat > 0 - assert number > 0 + assert number >= 0 # 0 means no warmup assert min_repeat_ms >= 0 self.repeat = repeat self.number = number