From e0043a04d426033422ed4deed3790f9bef7bdb12 Mon Sep 17 00:00:00 2001 From: Carsten Uphoff Date: Fri, 24 Jul 2020 09:56:04 +0200 Subject: [PATCH 1/4] Fix #15 --- yateto/__init__.py | 1 + yateto/memory.py | 41 ++++++++++++++++++++++++++++++++++++----- yateto/type.py | 10 +++++----- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/yateto/__init__.py b/yateto/__init__.py index 0b3dd50..48c3f0b 100644 --- a/yateto/__init__.py +++ b/yateto/__init__.py @@ -2,3 +2,4 @@ from .generator import NamespacedGenerator, Generator, simpleParameterSpace, parameterSpaceFromRanges from .arch import useArchitectureIdentifiedBy from .gemm_configuration import * +from .memory import * diff --git a/yateto/memory.py b/yateto/memory.py index 4cc8acf..837776a 100644 --- a/yateto/memory.py +++ b/yateto/memory.py @@ -1,10 +1,22 @@ from .ast.indices import BoundingBox, Range import copy +from enum import Enum import itertools import warnings import numpy as np from abc import ABC, abstractmethod +class Alignment(Enum): + """Alignment mode. + + Automatic: Assume aligned memory if stride is divisible by vector width. + Aligned: Pad the leading dimension with zeros such that stride is divisible by memory width. + Unaligned: Always assume unaligned memory access. + """ + Automatic = 0, + Aligned = 1, + Unaligned = 2 + class MemoryLayout(ABC): def __init__(self, shape): self._shape = shape @@ -55,7 +67,15 @@ class DenseMemoryLayout(MemoryLayout): def setAlignmentArch(cls, arch): cls.ALIGNMENT_ARCH = arch - def __init__(self, shape, boundingBox=None, stride=None, alignStride=False): + def __init__(self, shape, boundingBox=None, stride=None, alignStride=Alignment.Automatic): + """Construct DenseMemoryLayout. + + :param shape: tensor shape (tuple of integers) + :param boundingBox: Non-zero BoundingBox, covers complete tensor if None + :param stride: Stride of the leading dimension, computed automatically if None + :param alignStride: Alignment mode. Passing False is equal to Alignment.Automatic and passing + True is equal to Alignment.Aligned. + """ super().__init__(shape) if boundingBox: @@ -63,8 +83,16 @@ def __init__(self, shape, boundingBox=None, stride=None, alignStride=False): else: self._bbox = BoundingBox([Range(0, s) for s in self._shape]) + if alignStride == True: + self._alignment = Alignment.Aligned + elif alignStride == False: + self._alignment = Alignment.Automatic + elif isinstance(alignStride, Alignment): + self._alignment = alignStride + else: + raise ValueError("Unknown type for option alignStride") self._range0 = None - if alignStride: + if self._alignment == Alignment.Aligned: self._alignBB() if stride: @@ -86,8 +114,11 @@ def _alignBB(self): else: warnings.warn('Set architecture with DenseMemoryLayout.setAlignmentArch(arch) if you want to use the align stride feature.', UserWarning) + def alignment(self): + return self._alignment + def alignedStride(self): - if self.ALIGNMENT_ARCH is None: + if self.ALIGNMENT_ARCH is None or self._alignment == Alignment.Unaligned: return False offsetOk = self.ALIGNMENT_ARCH.checkAlignment(self._bbox[0].start) ldOk = self._stride[0] == 1 and (len(self._stride) == 1 or self.ALIGNMENT_ARCH.checkAlignment(self._stride[1])) @@ -99,7 +130,7 @@ def mayVectorizeDim(self, dim): return self.ALIGNMENT_ARCH.checkAlignment(self._bbox[dim].size()) @classmethod - def fromSpp(cls, spp, alignStride=False): + def fromSpp(cls, spp, alignStride=Alignment.Automatic): bbox = BoundingBox.fromSpp(spp) return cls(spp.shape, bbox, alignStride=alignStride) @@ -111,7 +142,7 @@ def permuted(self, permutation): originalBB = BoundingBox([self._range0] + self._bbox[1:]) if self._range0 else self._bbox newBB = BoundingBox([copy.copy(originalBB[p]) for p in permutation]) - return DenseMemoryLayout(newShape, newBB, alignStride=self._range0 is not None) + return DenseMemoryLayout(newShape, newBB, alignStride=self._alignment) def address(self, entry): assert entry in self._bbox diff --git a/yateto/type.py b/yateto/type.py index d79ba06..64b7852 100644 --- a/yateto/type.py +++ b/yateto/type.py @@ -1,7 +1,7 @@ import re from .ast.node import Node, IndexedTensor from numpy import ndarray, zeros, float64 -from .memory import DenseMemoryLayout +from .memory import DenseMemoryLayout, Alignment from . import aspp class AbstractType(object): @@ -30,8 +30,8 @@ class Tensor(AbstractType): GROUP_INDICES = r'\(({0}(,{0})*)\)'.format(GROUP_INDEX) VALID_NAME = r'^{}({})?$'.format(BASE_NAME, GROUP_INDICES) - def __init__(self, name, shape, spp=None, memoryLayoutClass=DenseMemoryLayout, alignStride=False, - namespace=None): + def __init__(self, name, shape, spp=None, memoryLayoutClass=DenseMemoryLayout, + alignStride=Alignment.Automatic, namespace=None): if not isinstance(shape, tuple): raise ValueError('shape must be a tuple') @@ -72,7 +72,7 @@ def __init__(self, name, shape, spp=None, memoryLayoutClass=DenseMemoryLayout, a self.setMemoryLayout(memoryLayoutClass, alignStride) - def setMemoryLayout(self, memoryLayoutClass, alignStride=False): + def setMemoryLayout(self, memoryLayoutClass, alignStride=Alignment.Automatic): self._memoryLayout = memoryLayoutClass.fromSpp(self._groupSpp, alignStride=alignStride) def _setSparsityPattern(self, spp, setOnlyGroupSpp=False): @@ -85,7 +85,7 @@ def _setSparsityPattern(self, spp, setOnlyGroupSpp=False): def setGroupSpp(self, spp): self._setSparsityPattern(spp, setOnlyGroupSpp=True) - self.setMemoryLayout(self._memoryLayout.__class__, alignStride=self._memoryLayout.alignedStride()) + self.setMemoryLayout(self._memoryLayout.__class__, alignStride=self._memoryLayout.alignment()) def __getitem__(self, indexNames): return IndexedTensor(self, indexNames) From 2f89bf46543050e2ce188f64e7d89caa619d3835 Mon Sep 17 00:00:00 2001 From: Carsten Uphoff Date: Wed, 3 Mar 2021 17:51:14 +0100 Subject: [PATCH 2/4] Add alignment compatbility check in controlflowgraph; fix eigen align flags --- yateto/codegen/gemm/gemmgen.py | 5 +++-- yateto/controlflow/graph.py | 8 ++++---- yateto/gemm_configuration.py | 12 ++++++------ yateto/memory.py | 16 ++++++++++------ 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/yateto/codegen/gemm/gemmgen.py b/yateto/codegen/gemm/gemmgen.py index eb3f7a0..b155881 100644 --- a/yateto/codegen/gemm/gemmgen.py +++ b/yateto/codegen/gemm/gemmgen.py @@ -89,9 +89,10 @@ def generate(self, cpp, routineCache): cpp( self._gemm_cfg.call(d.transA, d.transB, m.size(), n.size(), k.size(), - d.alpha, self._pointer(d.leftTerm, (m.start, k.start), d.transA), ldA, + d.alpha, self._pointer(d.leftTerm, (m.start, k.start), d.transA), + ldA, d.alignedA, self._pointer(d.rightTerm, (k.start, n.start), d.transB), ldB, - d.beta, self._pointer(d.result, (m.start, n.start), False), ldC)) + d.beta, self._pointer(d.result, (m.start, n.start), False), ldC, d.alignedC)) elif isinstance(self._gemm_cfg, GemmForge): diff --git a/yateto/controlflow/graph.py b/yateto/controlflow/graph.py index 9990cb6..e99a655 100644 --- a/yateto/controlflow/graph.py +++ b/yateto/controlflow/graph.py @@ -13,13 +13,13 @@ def variables(self): return {self} def maySubstitute(self, when, by): - return self.substituted(when, by).memoryLayout().isCompatible(self.eqspp()) + return self.substituted(when, by).memoryLayout().isCompatible(self.memoryLayout(), self.eqspp()) def substituted(self, when, by, memoryLayout=None): return by if self == when else self def resultCompatible(self, result): - return result.memoryLayout().isCompatible(self.eqspp()) + return result.memoryLayout().isCompatible(self.memoryLayout(), self.eqspp()) def isGlobal(self): return self.tensor is not None @@ -71,7 +71,7 @@ def variableList(self): def maySubstitute(self, when, by): layouts = [var.substituted(when, by).memoryLayout() for var in self._variables] - c1 = all(layouts[i].isCompatible(var.eqspp()) for i,var in enumerate(self._variables)) + c1 = all(layouts[i].isCompatible(var.memoryLayout(), var.eqspp()) for i,var in enumerate(self._variables)) c2 = self.node.argumentsCompatible(layouts) return c1 and c2 @@ -79,7 +79,7 @@ def substituted(self, when, by, memoryLayout): return Expression(self.node, memoryLayout, [var.substituted(when, by) for var in self._variables]) def resultCompatible(self, result): - c1 = result.memoryLayout().isCompatible(self.eqspp()) + c1 = result.memoryLayout().isCompatible(self.memoryLayout(), self.eqspp()) c2 = self.node.resultCompatible(result.memoryLayout()) return c1 and c2 diff --git a/yateto/gemm_configuration.py b/yateto/gemm_configuration.py index c02b285..c4e2fc4 100644 --- a/yateto/gemm_configuration.py +++ b/yateto/gemm_configuration.py @@ -38,7 +38,7 @@ def supported(self, m, n, k, sparseA, sparseB, transA, transB, alpha, def bool2Trans(self, trans): return 'Cblas{}Trans'.format('' if trans else 'No') - def call(self, transA, transB, M, N, K, alpha, A, ldA, B, ldB, beta, C, ldC): + def call(self, transA, transB, M, N, K, alpha, A, ldA, alignedA, B, ldB, beta, C, ldC, alignedC): parameters = [ 'CblasColMajor', self.bool2Trans(transA), @@ -65,7 +65,7 @@ def __init__(self, arch): def bool2Trans(self, trans): return 'BLIS{}TRANSPOSE'.format('_' if trans else '_NO_') - def call(self, transA, transB, M, N, K, alpha, A, ldA, B, ldB, beta, C, ldC): + def call(self, transA, transB, M, N, K, alpha, A, ldA, alignedA, B, ldB, beta, C, ldC, alignedC): init = '_blis_alpha = {}; _blis_beta = {};'.format(alpha, beta) parameters = [ self.bool2Trans(transA), @@ -91,13 +91,13 @@ def bool2Trans(self, trans): def sizeTrans(self, rows, cols, trans): return '{},{}'.format(cols,rows) if trans else '{},{}'.format(rows,cols) - def align(self, ld): + def align(self, ld, is_aligned): aligned = 'Unaligned' - if self._arch.checkAlignment(ld) and self._arch.alignment in [16,32,64,128]: + if is_aligned and self._arch.checkAlignment(ld) and self._arch.alignment in [16,32,64,128]: aligned = 'Aligned{}'.format(self._arch.alignment) return aligned - def call(self, transA, transB, M, N, K, alpha, A, ldA, B, ldB, beta, C, ldC): + def call(self, transA, transB, M, N, K, alpha, A, ldA, alignedA, B, ldB, beta, C, ldC, alignedC): AxB = '{alpha}_mapA{transA}*_mapB{transB}'.format( alpha=str(alpha) + '*' if alpha != 1.0 else '', transA=self.bool2Trans(transA), transB=self.bool2Trans(transB), @@ -122,7 +122,7 @@ def call(self, transA, transB, M, N, K, alpha, A, ldA, B, ldB, beta, C, ldC): sizeA=self.sizeTrans(M,K,transA), sizeB=self.sizeTrans(K,N,transB), ldA=ldA, ldB=ldB, ldC=ldC, A=A, B=B, C=C, - alignA=self.align(ldA), alignC=self.align(ldC), + alignA=self.align(ldA, alignedA), alignC=self.align(ldC, alignedC), code=code) return code diff --git a/yateto/memory.py b/yateto/memory.py index 837776a..48a755b 100644 --- a/yateto/memory.py +++ b/yateto/memory.py @@ -57,7 +57,7 @@ def __eq__(self, other): pass @abstractmethod - def isCompatible(self, spp): + def isCompatible(self, other, eqspp): pass class DenseMemoryLayout(MemoryLayout): @@ -271,11 +271,15 @@ def defuse(self, fusedRange, indices, I): stop -= B*s return ranges - def isCompatible(self, spp): - return BoundingBox.fromSpp(spp) in self.bbox() + def isCompatible(self, other, eqspp): + bb_contained = BoundingBox.fromSpp(eqspp) in self.bbox() + alignment_ok = self.alignedStride() == other.alignedStride() + return bb_contained and alignment_ok + def __eq__(self, other): - return self._stride == other._stride and self._bbox == other._bbox and self._stride == other._stride + return self._stride == other._stride and self._bbox == other._bbox and self._alignment == other._alignment + def __str__(self): return '{}(shape: {}, bounding box: {}, stride: {})'.format(type(self).__name__, self._shape, self._bbox, self._stride) @@ -355,8 +359,8 @@ def fromSpp(cls, spp, **kwargs): def __contains__(self, entry): return entry in self._bbox - def isCompatible(self, spp): - return self.fromSpp(spp) == self + def isCompatible(self, other, eqspp): + return other == self def __eq__(self, other): return self._bbox == other._bbox and np.array_equal(self._rowIndex, other._rowIndex) and np.array_equal(self._colPtr, other._colPtr) From 97573191bb3275e81701c983e91bf0e0d127e925 Mon Sep 17 00:00:00 2001 From: Carsten Uphoff Date: Mon, 8 Mar 2021 14:29:50 +0100 Subject: [PATCH 3/4] Bugfix: Do not assign new buffer if temporary variable is already mapped to a buffer --- yateto/controlflow/transformer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yateto/controlflow/transformer.py b/yateto/controlflow/transformer.py index e041389..73c16a6 100644 --- a/yateto/controlflow/transformer.py +++ b/yateto/controlflow/transformer.py @@ -124,7 +124,9 @@ def visit(self, cfg): ua = cfg[i].action # assign buffer if ua and not ua.isCompound() and ua.result.isLocal(): - if len(freeBuffers) > 0: + if ua.result in usedBuffers: + buf = usedBuffers[ua.result] + elif len(freeBuffers) > 0: buf = freeBuffers.pop() else: buf = numBuffers From 6920a2b602539baa503a959ab396f4918b2b439c Mon Sep 17 00:00:00 2001 From: Carsten Uphoff Date: Wed, 2 Jun 2021 17:38:49 +0200 Subject: [PATCH 4/4] Allow that the path to libxsmm may be given --- yateto/gemm_configuration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yateto/gemm_configuration.py b/yateto/gemm_configuration.py index c4e2fc4..9e491ab 100644 --- a/yateto/gemm_configuration.py +++ b/yateto/gemm_configuration.py @@ -133,8 +133,8 @@ def __init__(self, operation_name: str, includes: List[str], cmd: str, arch): self._arch = arch class LIBXSMM(CodeGenerator): - def __init__(self, arch, threshold: int = 128): - super().__init__('libxsmm', [], 'libxsmm_gemm_generator', arch) + def __init__(self, arch, cmd: str = 'libxsmm_gemm_generator', threshold: int = 128): + super().__init__('libxsmm', [], cmd, arch) self._threshold = threshold def _archSupported(self): @@ -159,8 +159,8 @@ def preference(self, m, n, k, sparseA, sparseB, transA, transB, alpha, beta, ali return Preference.LOW class PSpaMM(CodeGenerator): - def __init__(self, arch, threshold: int = 128): - super().__init__('pspamm', [], 'pspamm.py', arch) + def __init__(self, arch, cmd: str = 'pspamm.py', threshold: int = 128): + super().__init__('pspamm', [], cmd, arch) self._threshold = threshold def _archSupported(self):