SeisSol · davschneller · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025 · Apr 16, 2025
diff --git a/.github/workflows/yateto-cpu.yml b/.github/workflows/yateto-cpu.yml
@@ -73,7 +73,7 @@ jobs:
     - name: Codegen Tests
       run: |
         cd ./tests/code-gen
-        for example in matmul minimal indices slicing; do
+        for example in matmul minimal indices slicing elementwise reduction datatype conditional; do
           for build_type in Debug Release; do
             for precision in single double; do
               echo " ====== Test Config: ======"

diff --git a/include/yateto.h b/include/yateto.h
@@ -5,5 +5,6 @@
 #include "yateto/LinearAllocator.h"
 #include "yateto/Misc.h"
 #include "yateto/TensorView.h"
+#include "yateto/Type.h"
 
 #endif
diff --git a/include/yateto/LinearAllocator.h b/include/yateto/LinearAllocator.h
@@ -13,6 +13,12 @@ struct LinearAllocatorT {
     userSpaceMem = ptr;
   }
 
+  template <typename S>
+  void initialize(S* ptr) {
+    isInit = true;
+    userSpaceMem = reinterpret_cast<T*>(ptr);
+  }
+
   T* allocate(size_t size) {
     assert(isInit && "YATETO: Temporary-Memory manager hasn't been initialized");
     int currentByteCount = byteCount;

diff --git a/include/yateto/Type.h b/include/yateto/Type.h
@@ -0,0 +1,39 @@
+#ifndef YATETO_TYPE_H_
+#define YATETO_TYPE_H_
+
+#include <cstddef>
+
+// C++23 include
+#if __has_include(<stdfloat>)
+#include <stdfloat>
+#endif
+
+// cf. https://stackoverflow.com/a/70868019
+#define __STDC_WANT_IEC_60559_TYPES_EXT__
+#include <cfloat>
+
+namespace yateto {
+
+#ifdef __STDCPP_FLOAT128_T__
+using f128_ty = std::float128_t;
+#elif defined(FLT128_MIN)
+using f128_ty = _Float128;
+#else
+using f128_ty = __float128;
+#endif
+#ifdef __STDCPP_FLOAT16_T__
+using f16_ty = std::float16_t;
+#elif defined(FLT16_MIN)
+using f16_ty = _Float16;
+#else
+using f16_ty = __fp16;
+#endif
+#ifdef __STDCPP_BFLOAT16_T__
+using bf16_ty = std::bfloat16_t;
+#else
+using bf16_ty = __bf16;
+#endif
+
+} // namespace yateto
+
+#endif // YATETO_TYPE_H_
diff --git a/tests/code-gen/conditional.py b/tests/code-gen/conditional.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+from yateto import *
+
+import yateto.functions as yf
+
+def add(g):
+  N = 8
+  A = Tensor('A', (N, N))
+  B = Tensor('B', (N, N))
+  C = Tensor('C', (N, N))
+
+  AI = Tensor('AI', (N, N), datatype=Datatype.I32)
+  BI = Tensor('BI', (N, N), datatype=Datatype.I32)
+  CI = Tensor('CI', (N, N), datatype=Datatype.I32)
+
+  AB = Tensor('AB', (N, N), datatype=Datatype.BOOL)
+
+  X = Tensor('X', (), datatype=Datatype.BOOL)
+  X1 = Tensor('X1', (), datatype=Datatype.BOOL)
+  X2 = Tensor('X2', (), datatype=Datatype.BOOL)
+  X3 = Tensor('X3', (), datatype=Datatype.BOOL)
+
+  class Counter:
+    def __init__(self):
+      self.counter = 0
+
+  counter = Counter()
+
+  def _(kernel):
+    counter.counter += 1
+    g.add(f'kernel{counter.counter}', kernel)
+
+  _(yf.assignIf(X[''], A['ij'], yf.sqrt(B['ij'])))
+  _(yf.assignIf(yf.all(AB['ij'], 'ij'), A['ij'], yf.sqrt(B['ij'])))
+  _([
+    yf.assignIf(X[''], A['ij'], yf.sqrt(B['ij'])),
+    yf.assignIf(X[''], AI['ij'], -BI['ij'])
+    ])
+  _([
+    yf.assignIf(X1[''], A['ij'], B['ik'] * C['kj'] + C['ij']),
+    yf.assignIf(X1[''], A['ij'], A['ij'] + B['ik'] * C['kj'] + C['ij']),
+    yf.assignIf(X2[''], C['ij'], yf.sqrt(B['ij']))
+  ])
diff --git a/tests/code-gen/datatype.py b/tests/code-gen/datatype.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+
+from yateto import *
+
+import yateto.functions as yf
+
+def add(g):
+  N = 8
+  A = Tensor('A', (N, N))
+  B = Tensor('B', (N, N))
+  C = Tensor('C', (N, N))
+
+  AI = Tensor('AI', (N, N), datatype=Datatype.I32)
+  BI = Tensor('BI', (N, N), datatype=Datatype.I32)
+  CI = Tensor('CI', (N, N), datatype=Datatype.I32)
+
+  AB = Tensor('AB', (N, N), datatype=Datatype.BOOL)
+
+  class Counter:
+    def __init__(self):
+      self.counter = 0
+
+  counter = Counter()
+
+  def _(kernel):
+    counter.counter += 1
+    g.add(f'kernel{counter.counter}', kernel)
+
+  _(AI['ij'] <= yf.cast(A['ij'], Datatype.I32))
diff --git a/tests/code-gen/elementwise.py b/tests/code-gen/elementwise.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+from yateto import *
+
+import yateto.functions as yf
+
+def add(g):
+  N = 8
+  A = Tensor('A', (N, N))
+  B = Tensor('B', (N, N))
+  C = Tensor('C', (N, N))
+
+  AI = Tensor('AI', (N, N), datatype=Datatype.I32)
+  BI = Tensor('BI', (N, N), datatype=Datatype.I32)
+  CI = Tensor('CI', (N, N), datatype=Datatype.I32)
+
+  AB = Tensor('AB', (N, N), datatype=Datatype.BOOL)
+
+  class Counter:
+    def __init__(self):
+      self.counter = 0
+
+  counter = Counter()
+
+  def _(kernel):
+    counter.counter += 1
+    g.add(f'kernel{counter.counter}', kernel)
+
+  _(A['ij'] <= yf.sqrt(B['ij']))
+  _(A['ij'] <= yf.sqrt(B['ij']) + yf.sin(C['ij']))
+  _(A['ij'] <= yf.sqrt(B['ij']) * yf.sin(C['ij']))
+  _(A['ij'] <= yf.minimum(B['ij'], C['ij']))
+  _(A['ij'] <= yf.minimum(B['ij'], C['ij'] + yf.atanh(B['ij'])))
+
+  _(AI['ij'] <= BI['ij'] + CI['ij'])
+  _(AI['ij'] <= yf.bitwise_and(BI['ij'], CI['ij']))
+
+  _(AB['ij'] <= yf.greater_equal(BI['ij'], CI['ij']))
+  _(A['ij'] <= yf.where(yf.greater_equal(BI['ij'], CI['ij']), B['ij'], C['ij']))
+
+  _(AI['ij'] <= yf.cast(A['ij'], Datatype.I32))
diff --git a/tests/code-gen/reduction.py b/tests/code-gen/reduction.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+from yateto import *
+
+import yateto.functions as yf
+
+def add(g):
+  N = 8
+  A0 = Tensor('A0', ())
+  A1 = Tensor('A1', (N,))
+  A2 = Tensor('A2', (N, N))
+
+  AI0 = Tensor('AI0', (), datatype=Datatype.I32)
+  AI1 = Tensor('AI1', (N,), datatype=Datatype.I32)
+  AI2 = Tensor('AI2', (N, N), datatype=Datatype.I32)
+
+  AB0 = Tensor('AB0', (), datatype=Datatype.BOOL)
+  AB1 = Tensor('AB1', (N,), datatype=Datatype.BOOL)
+  AB2 = Tensor('AB2', (N, N), datatype=Datatype.BOOL)
+
+  class Counter:
+    def __init__(self):
+      self.counter = 0
+
+  counter = Counter()
+
+  def _(kernel):
+    counter.counter += 1
+    g.add(f'kernel{counter.counter}', kernel)
+
+  _(A0[''] <= yf.sum(A1['i'], 'i'))
+  _(A0[''] <= yf.sum(A2['ij'], 'ij'))
+  _(A0[''] <= yf.min(A2['ij'], 'ij'))
+
+  _(AI0[''] <= yf.sum(AI2['ij'], 'ij'))
+  _(AI0[''] <= yf.min(AI2['ij'], 'ij'))
+  _(AI0[''] <= yf.all(AI2['ij'], 'ij'))
+  _(AI0[''] <= yf.any(AI2['ij'], 'ij'))
+
+  _(AB0[''] <= yf.all(AB2['ij'], 'ij'))
+  _(AB0[''] <= yf.any(AB2['ij'], 'ij'))
diff --git a/yateto/arch.py b/yateto/arch.py
@@ -38,6 +38,7 @@
 #
 
 from .memory import DenseMemoryLayout
+from .type import Datatype
 from collections import namedtuple
 from typing import Union
 import re
@@ -69,19 +70,20 @@ def __init__(self,
     self.host_name = host_name
 
     self.precision = precision.upper()
-    if self.precision == 'D':
-      self.bytesPerReal = 8
-      self.typename = 'double'
-      self.epsilon = 2.22e-16
+    if self.precision == 'Q':
+      self.epsilon = 2**-112
+      self.datatype = Datatype.F128
+    elif self.precision == 'D':
+      self.epsilon = 2**-52
+      self.datatype = Datatype.F64
     elif self.precision == 'S':
-      self.bytesPerReal = 4
-      self.typename = 'float'
-      self.epsilon = 1.19e-7
+      self.epsilon = 2**-23
+      self.datatype = Datatype.F32
     else:
       raise ValueError(f'Unknown precision type {self.precision}')
     self.alignment = alignment
-    assert self.alignment % self.bytesPerReal == 0
-    self.alignedReals = self.alignment // self.bytesPerReal
+    assert self.alignment % self.datatype.size() == 0
+    self.alignedReals = self.alignment // self.datatype.size()
     self.enablePrefetch = enablePrefetch
 
     self.uintTypename = 'unsigned'
@@ -110,10 +112,10 @@ def checkAlignment(self, offset):
     return offset % self.alignedReals == 0
 
   def formatConstant(self, constant):
-    return str(constant) + ('f' if self.precision == 'S' else '')
+    return self.datatype.literal(constant)
 
-  def onHeap(self, numReals):
-    return (numReals * self.bytesPerReal) > self._tmpStackLimit
+  def onHeap(self, byteCount):
+    return byteCount > self._tmpStackLimit
 
   def __eq__(self, other):
     return self.name == other.name