Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions yateto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .generator import NamespacedGenerator, Generator, simpleParameterSpace, parameterSpaceFromRanges
from .arch import useArchitectureIdentifiedBy
from .gemm_configuration import *
from .memory import *
5 changes: 3 additions & 2 deletions yateto/codegen/gemm/gemmgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,10 @@ def generate(self, cpp, routineCache):
cpp( self._gemm_cfg.call(d.transA,
d.transB,
m.size(), n.size(), k.size(),
d.alpha, self._pointer(d.leftTerm, (m.start, k.start), d.transA), ldA,
d.alpha, self._pointer(d.leftTerm, (m.start, k.start), d.transA),
ldA, d.alignedA,
self._pointer(d.rightTerm, (k.start, n.start), d.transB), ldB,
d.beta, self._pointer(d.result, (m.start, n.start), False), ldC))
d.beta, self._pointer(d.result, (m.start, n.start), False), ldC, d.alignedC))

elif isinstance(self._gemm_cfg, GemmForge):

Expand Down
4 changes: 2 additions & 2 deletions yateto/codegen/visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def generate(self, cpp, cfg, factory, routineCache, gemm_cfg):
# an provided by the user
required_tmp_mem = 0
cfg = DetermineLocalInitialization().visit(cfg)
localPtrs = list()
localPtrs = set()
for pp in cfg:
localPtrs.extend(pp.bufferMap.keys())
localPtrs.update(pp.bufferMap.keys())
if localPtrs:
cpp( '{}{};'.format(self._arch.typename, ','.join(map(lambda x: ' *' + str(x), localPtrs))) )
for pp in cfg:
Expand Down
8 changes: 4 additions & 4 deletions yateto/controlflow/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ def variables(self):
return {self}

def maySubstitute(self, when, by):
return self.substituted(when, by).memoryLayout().isCompatible(self.eqspp())
return self.substituted(when, by).memoryLayout().isCompatible(self.memoryLayout(), self.eqspp())

def substituted(self, when, by, memoryLayout=None):
return by if self == when else self

def resultCompatible(self, result):
return result.memoryLayout().isCompatible(self.eqspp())
return result.memoryLayout().isCompatible(self.memoryLayout(), self.eqspp())

def isGlobal(self):
return self.tensor is not None
Expand Down Expand Up @@ -71,15 +71,15 @@ def variableList(self):

def maySubstitute(self, when, by):
layouts = [var.substituted(when, by).memoryLayout() for var in self._variables]
c1 = all(layouts[i].isCompatible(var.eqspp()) for i,var in enumerate(self._variables))
c1 = all(layouts[i].isCompatible(var.memoryLayout(), var.eqspp()) for i,var in enumerate(self._variables))
c2 = self.node.argumentsCompatible(layouts)
return c1 and c2

def substituted(self, when, by, memoryLayout):
return Expression(self.node, memoryLayout, [var.substituted(when, by) for var in self._variables])

def resultCompatible(self, result):
c1 = result.memoryLayout().isCompatible(self.eqspp())
c1 = result.memoryLayout().isCompatible(self.memoryLayout(), self.eqspp())
c2 = self.node.resultCompatible(result.memoryLayout())
return c1 and c2

Expand Down
4 changes: 3 additions & 1 deletion yateto/controlflow/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ def visit(self, cfg):
ua = cfg[i].action
# assign buffer
if ua and not ua.isCompound() and ua.result.isLocal():
if len(freeBuffers) > 0:
if ua.result in usedBuffers:
buf = usedBuffers[ua.result]
elif len(freeBuffers) > 0:
buf = freeBuffers.pop()
else:
buf = numBuffers
Expand Down
20 changes: 10 additions & 10 deletions yateto/gemm_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def supported(self, m, n, k, sparseA, sparseB, transA, transB, alpha,
def bool2Trans(self, trans):
return 'Cblas{}Trans'.format('' if trans else 'No')

def call(self, transA, transB, M, N, K, alpha, A, ldA, B, ldB, beta, C, ldC):
def call(self, transA, transB, M, N, K, alpha, A, ldA, alignedA, B, ldB, beta, C, ldC, alignedC):
parameters = [
'CblasColMajor',
self.bool2Trans(transA),
Expand All @@ -65,7 +65,7 @@ def __init__(self, arch):
def bool2Trans(self, trans):
return 'BLIS{}TRANSPOSE'.format('_' if trans else '_NO_')

def call(self, transA, transB, M, N, K, alpha, A, ldA, B, ldB, beta, C, ldC):
def call(self, transA, transB, M, N, K, alpha, A, ldA, alignedA, B, ldB, beta, C, ldC, alignedC):
init = '_blis_alpha = {}; _blis_beta = {};'.format(alpha, beta)
parameters = [
self.bool2Trans(transA),
Expand All @@ -91,13 +91,13 @@ def bool2Trans(self, trans):
def sizeTrans(self, rows, cols, trans):
return '{},{}'.format(cols,rows) if trans else '{},{}'.format(rows,cols)

def align(self, ld):
def align(self, ld, is_aligned):
aligned = 'Unaligned'
if self._arch.checkAlignment(ld) and self._arch.alignment in [16,32,64,128]:
if is_aligned and self._arch.checkAlignment(ld) and self._arch.alignment in [16,32,64,128]:
aligned = 'Aligned{}'.format(self._arch.alignment)
return aligned

def call(self, transA, transB, M, N, K, alpha, A, ldA, B, ldB, beta, C, ldC):
def call(self, transA, transB, M, N, K, alpha, A, ldA, alignedA, B, ldB, beta, C, ldC, alignedC):
AxB = '{alpha}_mapA{transA}*_mapB{transB}'.format(
alpha=str(alpha) + '*' if alpha != 1.0 else '',
transA=self.bool2Trans(transA), transB=self.bool2Trans(transB),
Expand All @@ -122,7 +122,7 @@ def call(self, transA, transB, M, N, K, alpha, A, ldA, B, ldB, beta, C, ldC):
sizeA=self.sizeTrans(M,K,transA),
sizeB=self.sizeTrans(K,N,transB),
ldA=ldA, ldB=ldB, ldC=ldC, A=A, B=B, C=C,
alignA=self.align(ldA), alignC=self.align(ldC),
alignA=self.align(ldA, alignedA), alignC=self.align(ldC, alignedC),
code=code)
return code

Expand All @@ -133,8 +133,8 @@ def __init__(self, operation_name: str, includes: List[str], cmd: str, arch):
self._arch = arch

class LIBXSMM(CodeGenerator):
def __init__(self, arch, threshold: int = 128):
super().__init__('libxsmm', [], 'libxsmm_gemm_generator', arch)
def __init__(self, arch, cmd: str = 'libxsmm_gemm_generator', threshold: int = 128):
super().__init__('libxsmm', [], cmd, arch)
self._threshold = threshold

def _archSupported(self):
Expand All @@ -159,8 +159,8 @@ def preference(self, m, n, k, sparseA, sparseB, transA, transB, alpha, beta, ali
return Preference.LOW

class PSpaMM(CodeGenerator):
def __init__(self, arch, threshold: int = 128):
super().__init__('pspamm', [], 'pspamm.py', arch)
def __init__(self, arch, cmd: str = 'pspamm.py', threshold: int = 128):
super().__init__('pspamm', [], cmd, arch)
self._threshold = threshold

def _archSupported(self):
Expand Down
57 changes: 46 additions & 11 deletions yateto/memory.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
from .ast.indices import BoundingBox, Range
import copy
from enum import Enum
import itertools
import warnings
import numpy as np
from abc import ABC, abstractmethod

class Alignment(Enum):
"""Alignment mode.

Automatic: Assume aligned memory if stride is divisible by vector width.
Aligned: Pad the leading dimension with zeros such that stride is divisible by memory width.
Unaligned: Always assume unaligned memory access.
"""
Automatic = 0,
Aligned = 1,
Unaligned = 2

class MemoryLayout(ABC):
def __init__(self, shape):
self._shape = shape
Expand Down Expand Up @@ -45,7 +57,7 @@ def __eq__(self, other):
pass

@abstractmethod
def isCompatible(self, spp):
def isCompatible(self, other, eqspp):
pass

class DenseMemoryLayout(MemoryLayout):
Expand All @@ -55,16 +67,32 @@ class DenseMemoryLayout(MemoryLayout):
def setAlignmentArch(cls, arch):
cls.ALIGNMENT_ARCH = arch

def __init__(self, shape, boundingBox=None, stride=None, alignStride=False):
def __init__(self, shape, boundingBox=None, stride=None, alignStride=Alignment.Automatic):
"""Construct DenseMemoryLayout.

:param shape: tensor shape (tuple of integers)
:param boundingBox: Non-zero BoundingBox, covers complete tensor if None
:param stride: Stride of the leading dimension, computed automatically if None
:param alignStride: Alignment mode. Passing False is equal to Alignment.Automatic and passing
True is equal to Alignment.Aligned.
"""
super().__init__(shape)

if boundingBox:
self._bbox = boundingBox
else:
self._bbox = BoundingBox([Range(0, s) for s in self._shape])

if alignStride == True:
self._alignment = Alignment.Aligned
elif alignStride == False:
self._alignment = Alignment.Automatic
elif isinstance(alignStride, Alignment):
self._alignment = alignStride
else:
raise ValueError("Unknown type for option alignStride")
self._range0 = None
if alignStride:
if self._alignment == Alignment.Aligned:
self._alignBB()

if stride:
Expand All @@ -86,8 +114,11 @@ def _alignBB(self):
else:
warnings.warn('Set architecture with DenseMemoryLayout.setAlignmentArch(arch) if you want to use the align stride feature.', UserWarning)

def alignment(self):
return self._alignment

def alignedStride(self):
if self.ALIGNMENT_ARCH is None:
if self.ALIGNMENT_ARCH is None or self._alignment == Alignment.Unaligned:
return False
offsetOk = self.ALIGNMENT_ARCH.checkAlignment(self._bbox[0].start)
ldOk = self._stride[0] == 1 and (len(self._stride) == 1 or self.ALIGNMENT_ARCH.checkAlignment(self._stride[1]))
Expand All @@ -99,7 +130,7 @@ def mayVectorizeDim(self, dim):
return self.ALIGNMENT_ARCH.checkAlignment(self._bbox[dim].size())

@classmethod
def fromSpp(cls, spp, alignStride=False):
def fromSpp(cls, spp, alignStride=Alignment.Automatic):
bbox = BoundingBox.fromSpp(spp)
return cls(spp.shape, bbox, alignStride=alignStride)

Expand All @@ -111,7 +142,7 @@ def permuted(self, permutation):

originalBB = BoundingBox([self._range0] + self._bbox[1:]) if self._range0 else self._bbox
newBB = BoundingBox([copy.copy(originalBB[p]) for p in permutation])
return DenseMemoryLayout(newShape, newBB, alignStride=self._range0 is not None)
return DenseMemoryLayout(newShape, newBB, alignStride=self._alignment)

def address(self, entry):
assert entry in self._bbox
Expand Down Expand Up @@ -240,11 +271,15 @@ def defuse(self, fusedRange, indices, I):
stop -= B*s
return ranges

def isCompatible(self, spp):
return BoundingBox.fromSpp(spp) in self.bbox()
def isCompatible(self, other, eqspp):
bb_contained = BoundingBox.fromSpp(eqspp) in self.bbox()
alignment_ok = self.alignedStride() == other.alignedStride()
return bb_contained and alignment_ok


def __eq__(self, other):
return self._stride == other._stride and self._bbox == other._bbox and self._stride == other._stride
return self._stride == other._stride and self._bbox == other._bbox and self._alignment == other._alignment


def __str__(self):
return '{}(shape: {}, bounding box: {}, stride: {})'.format(type(self).__name__, self._shape, self._bbox, self._stride)
Expand Down Expand Up @@ -324,8 +359,8 @@ def fromSpp(cls, spp, **kwargs):
def __contains__(self, entry):
return entry in self._bbox

def isCompatible(self, spp):
return self.fromSpp(spp) == self
def isCompatible(self, other, eqspp):
return other == self

def __eq__(self, other):
return self._bbox == other._bbox and np.array_equal(self._rowIndex, other._rowIndex) and np.array_equal(self._colPtr, other._colPtr)
8 changes: 4 additions & 4 deletions yateto/type.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from .ast.node import Node, IndexedTensor
from numpy import ndarray, zeros, float64
from .memory import DenseMemoryLayout
from .memory import DenseMemoryLayout, Alignment
from . import aspp

class AbstractType(object):
Expand Down Expand Up @@ -35,7 +35,7 @@ def __init__(self,
shape,
spp=None,
memoryLayoutClass=DenseMemoryLayout,
alignStride=False,
alignStride=Alignment.Automatic,
namespace=None):
if not isinstance(shape, tuple):
raise ValueError('shape must be a tuple')
Expand Down Expand Up @@ -77,7 +77,7 @@ def __init__(self,

self.setMemoryLayout(memoryLayoutClass, alignStride)

def setMemoryLayout(self, memoryLayoutClass, alignStride=False):
def setMemoryLayout(self, memoryLayoutClass, alignStride=Alignment.Automatic):
self._memoryLayout = memoryLayoutClass.fromSpp(self._groupSpp, alignStride=alignStride)

def _setSparsityPattern(self, spp, setOnlyGroupSpp=False):
Expand All @@ -90,7 +90,7 @@ def _setSparsityPattern(self, spp, setOnlyGroupSpp=False):

def setGroupSpp(self, spp):
self._setSparsityPattern(spp, setOnlyGroupSpp=True)
self.setMemoryLayout(self._memoryLayout.__class__, alignStride=self._memoryLayout.alignedStride())
self.setMemoryLayout(self._memoryLayout.__class__, alignStride=self._memoryLayout.alignment())

def __getitem__(self, indexNames):
return IndexedTensor(self, indexNames)
Expand Down