diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 68611914b78e9..8959f7b56364e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -896,6 +896,7 @@ jobs: with: key: macos-${{ matrix.backend }}-minimal deps: testing_minimal + pydeps: "capstone" llvm: ${{ matrix.backend == 'llvm' && 'true' }} - name: Set env run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1' || matrix.backend == 'metal' && 'METAL=1'}}" >> $GITHUB_ENV @@ -910,6 +911,9 @@ jobs: - name: Run macOS-specific unit test if: matrix.backend == 'cpu' run: python3 -m pytest test/unit/test_disk_tensor.py::TestDiskTensor::test_copy_to_cpu_not_truncated + - name: fp16 test + if: matrix.backend == 'cpu' + run: DEBUG=7 python3 test/unit/test_cpu_fp16_alu.py TestFloat16Alu.test_cpu # ****** Windows Tests ****** diff --git a/test/unit/test_cpu_fp16_alu.py b/test/unit/test_cpu_fp16_alu.py new file mode 100644 index 0000000000000..57b73d3894829 --- /dev/null +++ b/test/unit/test_cpu_fp16_alu.py @@ -0,0 +1,14 @@ +import unittest +from tinygrad import Tensor, dtypes +from tinygrad.device import Device +from tinygrad.helpers import OSX + +class TestFloat16Alu(unittest.TestCase): + @unittest.skipUnless(Device.DEFAULT == "CPU" and OSX, "") + def test_cpu(self): + a = Tensor([1,2,3,4], dtype=dtypes.float16) + b = Tensor([1,2,3,4], dtype=dtypes.float16) + c = (a + b).realize() + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tinygrad/renderer/cstyle.py b/tinygrad/renderer/cstyle.py index 15592b8ecea8c..58dc991fa3077 100644 --- a/tinygrad/renderer/cstyle.py +++ b/tinygrad/renderer/cstyle.py @@ -193,7 +193,7 @@ class ClangRenderer(CStyleLanguage): # language options buffer_suffix = " restrict" - type_map = {dtypes.bool:"_Bool", dtypes.half:"__fp16"} + type_map = {dtypes.bool:"_Bool", dtypes.half:"_Float16"} code_for_op = {**({k:v for k,v in CStyleLanguage.code_for_op.items() if k not in [Ops.EXP2, Ops.SIN, Ops.LOG2]}), Ops.SQRT: lambda x,dtype: f"__builtin_sqrt({x})" if dtype == dtypes.float64 else f"__builtin_sqrtf({x})"} # LLVM legalizes double => half cast on systems that don't support it natively (like x86 cpus without AVX512-FP16) into a compiler-rt libcall. diff --git a/tinygrad/runtime/ops_cpu.py b/tinygrad/runtime/ops_cpu.py index e180a0a6b3b9e..3d677a12fe914 100644 --- a/tinygrad/runtime/ops_cpu.py +++ b/tinygrad/runtime/ops_cpu.py @@ -1,5 +1,5 @@ -import functools, platform, subprocess, sys -from tinygrad.helpers import capstone_flatdump, getenv +import functools, platform, subprocess, sys, ctypes +from tinygrad.helpers import capstone_flatdump, getenv, OSX from tinygrad.device import Compiled, Compiler, MallocAllocator, CPUProgram from tinygrad.runtime.support.elf import jit_loader from tinygrad.renderer.cstyle import ClangRenderer @@ -11,7 +11,7 @@ def compile(self, src:str) -> bytes: # -fno-math-errno is required for __builtin_sqrt to become an instruction instead of a function call # x18 is a reserved platform register. It is clobbered on context switch in macos and is used to store TEB pointer in windows on arm, don't use it target = 'x86_64' if sys.platform == 'win32' else platform.machine() - args = ['-march=native', f'--target={target}-none-unknown-elf', '-O2', '-fPIC', '-ffreestanding', '-fno-math-errno', '-nostdlib', '-fno-ident'] + args = ['-mcpu=native', f'--target={target}-none-unknown-elf', '-O2', '-fPIC', '-ffreestanding', '-fno-math-errno', '-nostdlib', '-fno-ident'] arch_args = ['-ffixed-x18'] if target == 'arm64' else [] obj = subprocess.check_output([getenv("CC", 'clang'), '-c', '-x', 'c', *args, *arch_args, '-', '-o', '-'], input=src.encode('utf-8')) return jit_loader(obj)