diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 4e6812f..a0fef01 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -22,7 +22,7 @@ jobs:
             core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
             core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
 
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
         with:
           submodules: true
 
@@ -60,118 +60,41 @@ jobs:
   Build-mac:
     runs-on: macos-latest
     steps:
-      - name: Export GitHub Actions cache environment variables
-        uses: actions/github-script@v7
-        with:
-          script: |
-            core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
-            core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
-
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
         with:
           submodules: true
 
-      - name: Install system-wide build tools
-        shell: bash
-        # Install
-        #   mono: NuGet requires a dotnet runtime
-        #   ninja: Build system
-        #   llvm: Just for clang-tidy. Need to add to path.
-        # Just add clang-tidy to path, not all of LLVM clang.
-        run: |
-          brew install mono ninja llvm
-          ln -s $(brew --prefix llvm)/bin/clang-tidy /usr/local/bin/clang-tidy
-          brew install autoconf autoconf-archive automake libtool
+      - name: Install dependencies
+        run: brew install meson ninja fftw armadillo googletest google-benchmark pybind11
 
-      - name: Setup VCPKG
-        shell: bash
-        run: |
-          cd ${{ github.workspace }}
-          git clone https://github.com/microsoft/vcpkg
-          ${{ github.workspace }}/vcpkg/bootstrap-vcpkg.sh
+      - name: Meson setup
+        run: meson setup builddir --buildtype=release
 
-      - name: Add NuGet sources
-        shell: bash
-        env:
-          gh_packages_secret: ${{ secrets.GH_PACKAGES_TOKEN }}
-        if: ${{ env.gh_packages_secret != '' }}
-        run: |
-          mono `${{ env.VCPKG_EXE }} fetch nuget | tail -n 1` \
-            sources add \
-            -Source "${{ env.FEED_URL }}" \
-            -StorePasswordInClearText \
-            -Name GitHubPackages \
-            -UserName "${{ env.USERNAME }}" \
-            -Password "${{ secrets.GH_PACKAGES_TOKEN }}"
-          mono `${{ env.VCPKG_EXE }} fetch nuget | tail -n 1` \
-            setapikey "${{ secrets.GH_PACKAGES_TOKEN }}" \
-            -Source "${{ env.FEED_URL }}"
+      - name: Build
+        run: meson compile -C builddir
 
-      - name: CMake configure
-        shell: bash
-        run: cmake --preset clang
-
-      - name: CMake build
-        shell: bash
-        run: cmake --build --preset clang-release
-
-      - name: CTest
-        shell: bash
-        run: ctest --output-on-failure --test-dir build/clang/test/
+      - name: Test
+        run: meson test -C builddir --print-errorlogs
 
   Build-linux:
     runs-on: ubuntu-24.04
     steps:
-      - name: Export GitHub Actions cache environment variables
-        uses: actions/github-script@v7
-        with:
-          script: |
-            core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
-            core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
-
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
         with:
           submodules: true
 
-      - name: Install system dependencies
-        shell: bash
+      - name: Install dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y mono-devel ninja-build pkg-config cmake clang clang-tidy
-
-      - name: Setup VCPKG
-        shell: bash
-        run: |
-          cd ${{ github.workspace }}
-          git clone https://github.com/microsoft/vcpkg
-          ${{ github.workspace }}/vcpkg/bootstrap-vcpkg.sh
+          sudo apt-get install -y meson ninja-build pkg-config clang \
+            libfftw3-dev libarmadillo-dev libgtest-dev libbenchmark-dev \
+            pybind11-dev
 
-      - name: Add NuGet sources
-        shell: bash
-        env:
-          gh_packages_secret: ${{ secrets.GH_PACKAGES_TOKEN }}
-        if: ${{ env.gh_packages_secret != '' }}
-        run: |
-          # Use nuget via mono (since mono is not pre-installed on ubuntu, and vcpkg fetches the Windows nuget binary)
-          mono `${{ env.VCPKG_EXE }} fetch nuget | tail -n 1` \
-            sources add \
-            -Source "${{ env.FEED_URL }}" \
-            -StorePasswordInClearText \
-            -Name GitHubPackages \
-            -UserName "${{ env.USERNAME }}" \
-            -Password "${{ secrets.GH_PACKAGES_TOKEN }}"
-          mono `${{ env.VCPKG_EXE }} fetch nuget | tail -n 1` \
-            setapikey "${{ secrets.GH_PACKAGES_TOKEN }}" \
-            -Source "${{ env.FEED_URL }}"
+      - name: Meson setup
+        run: CC=clang CXX=clang++ meson setup builddir --buildtype=release
 
-      - name: CMake configure
-        shell: bash
-        run: cmake --preset clang
-
-      - name: CMake build
-        shell: bash
-        run: cmake --build --preset clang-release
+      - name: Build
+        run: meson compile -C builddir
 
-      - name: CTest
-        shell: bash
-        run: ctest --output-on-failure --test-dir build/clang/test/
+      - name: Test
+        run: meson test -C builddir --print-errorlogs
diff --git a/.gitignore b/.gitignore
index b7aed4d..2b07d51 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,7 +13,7 @@ __pycache__/
 
 # Distribution / packaging
 .Python
-build/
+build*/
 develop-eggs/
 dist/
 downloads/
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 9e26dfe..b346d75 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1 +1,3 @@
-{}
\ No newline at end of file
+{
+    "C_Cpp.default.configurationProvider": "mesonbuild.mesonbuild",
+}
\ No newline at end of file
diff --git a/benchmark/meson.build b/benchmark/meson.build
new file mode 100644
index 0000000..cee8511
--- /dev/null
+++ b/benchmark/meson.build
@@ -0,0 +1,9 @@
+benchmark_dep = dependency('benchmark')
+armadillo_dep = dependency('armadillo')
+
+executable('bench_fftconv',
+  'bench_fftconv.cpp',
+  'bench_hilbert.cpp',
+  include_directories : fftconv_inc,
+  dependencies : [fftw3_dep, fftw3f_dep, benchmark_dep, armadillo_dep],
+)
diff --git a/include/fftconv/fftconv.hpp b/include/fftconv/fftconv.hpp
index dc3e923..d32e4a4 100644
--- a/include/fftconv/fftconv.hpp
+++ b/include/fftconv/fftconv.hpp
@@ -8,10 +8,8 @@
 #include <complex>
 #include <fftconv/aligned_vector.hpp>
 #include <fftconv/fftw.hpp>
-#include <memory>
 #include <span>
 #include <type_traits>
-#include <unordered_map>
 
 // NOLINTBEGIN(*-reinterpret-cast, *-const-cast, *-pointer-arithmetic)
 
@@ -386,13 +384,12 @@ struct FFTConvEngine : public fftw::cache_mixin<FFTConvEngine<T, PlannerFlag>> {
 
     const size_t fft_size = buf.real.size();
     const size_t step_size = fft_size - (k.size() - 1);
+    const T fct = static_cast<T>(1. / fft_size);
 
     // forward fft of kernel and save to complex2
     internal::copy_to_padded_buffer<T>(k, buf.real);
     forward.execute_dft_r2c(buf.real_ptr(), buf.cx2_ptr());
 
-    const auto fct = static_cast<T>(1. / fft_size);
-
     if constexpr (Mode == ConvMode::Full) {
       assert(a.size() + k.size() - 1 == out.size());
 
@@ -450,7 +447,7 @@ struct FFTConvEngine : public fftw::cache_mixin<FFTConvEngine<T, PlannerFlag>> {
 //    * Cache fftw_plan
 //    * Reuse buffers (no malloc on second call to the same convolution size)
 // https://en.wikipedia.org/w/index.php?title=Convolution#Fast_convolution_algorithms
-template <Floating T, ConvMode Mode = ConvMode::Same,
+template <Floating T, ConvMode Mode = ConvMode::Full,
           int PlannerFlag = FFTW_ESTIMATE>
 void convolve_fftw(const std::span<const T> input,
                    const std::span<const T> kernel, std::span<T> output) {
@@ -471,7 +468,7 @@ For "Same" mode, output_size == input_size
 2. convolve with kernel using fft of length N.
 3. add blocks together
  */
-template <Floating T, ConvMode Mode = ConvMode::Same,
+template <Floating T, ConvMode Mode = ConvMode::Full,
           int PlannerFlag = FFTW_ESTIMATE>
 void oaconvolve_fftw(std::span<const T> input, std::span<const T> kernel,
                      std::span<T> output) {
diff --git a/include/fftconv/fftw.hpp b/include/fftconv/fftw.hpp
index 561d83b..c5bf3c8 100644
--- a/include/fftconv/fftw.hpp
+++ b/include/fftconv/fftw.hpp
@@ -7,8 +7,8 @@ A C++ FFTW wrapper
 #include <complex>
 #include <cstdlib>
 #include <fftw3.h>
-#include <span>
 #include <memory>
+#include <span>
 #include <type_traits>
 #include <unordered_map>
 
@@ -20,7 +20,8 @@ A C++ FFTW wrapper
 #include <arm_neon.h>
 #endif
 
-// NOLINTBEGIN(*-pointer-arithmetic, *-macro-usage, *-const-cast)
+// NOLINTBEGIN(*-pointer-arithmetic, *-macro-usage, *-const-cast,
+// *-math-missing-parenthesis)
 
 namespace fftw {
 
@@ -31,8 +32,10 @@ struct WisdomSetup {
   explicit WisdomSetup(bool threadSafe) {
     static bool callSetup = true;
     if (threadSafe && callSetup) {
+#if defined FFTW_HAVE_THREADS
       fftw_make_planner_thread_safe();
       fftwf_make_planner_thread_safe();
+#endif
       callSetup = false;
     }
     fftw_import_wisdom_from_filename(".fftw_wisdom");
@@ -625,7 +628,7 @@ Helper functions
 out[i] += in[i] * fct
  */
 template <typename T>
-inline void normalize_add(T *out, T *in, size_t len, T fct) {
+inline void normalize_add(T *out, const T *in, size_t len, T fct) {
   for (size_t i = 0; i < len; ++i) {
     out[i] += in[i] * fct;
   }
@@ -637,9 +640,7 @@ out[i] += in[i] * fct
 template <typename T>
 inline void normalize_add(std::span<T> out, std::span<const T> in, T fct) {
   const auto len = std::min(out.size(), in.size());
-  for (size_t i = 0; i < len; ++i) {
-    out[i] += in[i] * fct;
-  }
+  normalize_add<T>(out.data(), in.data(), len, fct);
 }
 
 /**
@@ -925,4 +926,5 @@ void scale_imag_and_magnitude(T const *real, T const *imag, T fct, size_t n,
 
 } // namespace fftw
 
-// NOLINTEND(*-pointer-arithmetic, *-macro-usage, *-const-cast)
+// NOLINTEND(*-pointer-arithmetic, *-macro-usage, *-const-cast,
+// *-math-missing-parenthesis)
diff --git a/include/fftconv/hilbert.hpp b/include/fftconv/hilbert.hpp
index b688590..4c3b93d 100644
--- a/include/fftconv/hilbert.hpp
+++ b/include/fftconv/hilbert.hpp
@@ -5,7 +5,8 @@
 #include <fftconv/fftw.hpp>
 #include <span>
 
-// NOLINTBEGIN(*-pointer-arithmetic, *-magic-numbers)
+// NOLINTBEGIN(*-pointer-arithmetic, *-magic-numbers,
+// *-math-missing-parenthesis)
 
 namespace fftconv {
 
@@ -15,19 +16,19 @@ Uses FFTW's r2c transform
 */
 template <fftw::Floating T>
 void hilbert(const std::span<const T> x, const std::span<T> env) {
-  const auto n = x.size();
+  const size_t n = x.size();
   assert(n > 0);
   assert(x.size() == env.size());
 
   fftw::EngineR2C1D<T> &engine = fftw::EngineR2C1D<T>::get(n);
   fftw::R2CBuffer<T> &buf = engine.buf;
 
-  if (isSIMDAligned<64>(x.data())) {
+  if (isSIMDAligned<32>(x.data())) {
     // Avoid a copy
     engine.forward(x.data(), buf.out);
   } else {
     // Copy input to real buffer
-    for (int i = 0; i < n; ++i) {
+    for (size_t i = 0; i < n; ++i) {
       buf.in[i] = x[i];
     }
 
@@ -36,8 +37,8 @@ void hilbert(const std::span<const T> x, const std::span<T> env) {
   }
 
   // Multiply by 1j (skip DC and Nyquist)
-  const auto cx_size = n / 2 + 1;
-  for (auto i = 0; i < cx_size; ++i) {
+  const size_t cx_size = n / 2 + 1;
+  for (size_t i = 0; i < cx_size; ++i) {
     // Skip DC (0 Hz) and Nyquist (n/2 Hz when n is even)
     if (i == 0 || (n % 2 == 0 && i == cx_size - 1)) {
       buf.out[i][0] = 0.0;
@@ -56,7 +57,7 @@ void hilbert(const std::span<const T> x, const std::span<T> env) {
   // Take the abs of the analytic signal
   const T fct = static_cast<T>(1. / n);
 
-  for (auto i = 0; i < n; ++i) {
+  for (size_t i = 0; i < n; ++i) {
     const auto real = x[i];
     const auto imag = buf.in[i] * fct;
     env[i] = std::sqrt(real * real + imag * imag);
@@ -182,4 +183,4 @@ void hilbert(const std::span<const T> x, const std::span<T> env) {
 
 } // namespace fftconv
 
-// NOLINTEND(*-pointer-arithmetic, *-magic-numbers)
+// NOLINTEND(*-pointer-arithmetic, *-magic-numbers, *-math-missing-parenthesis)
diff --git a/meson.build b/meson.build
new file mode 100644
index 0000000..9ac664c
--- /dev/null
+++ b/meson.build
@@ -0,0 +1,13 @@
+project('fftconv', 'c', 'cpp',
+  version : '0.5.1',
+  default_options : ['cpp_std=c++20', 'warning_level=2'],
+)
+
+fftconv_inc = include_directories('include')
+
+fftw3_dep = dependency('fftw3')
+fftw3f_dep = dependency('fftw3f')
+
+subdir('test')
+subdir('benchmark')
+subdir('py')
diff --git a/py/meson.build b/py/meson.build
new file mode 100644
index 0000000..515edc5
--- /dev/null
+++ b/py/meson.build
@@ -0,0 +1,18 @@
+py = import('python').find_installation(pure: false)
+pybind11_dep = dependency('pybind11')
+
+py.extension_module(
+  '_pyfftconv',
+  'main.cpp',
+  include_directories: fftconv_inc,
+  dependencies: [fftw3_dep, fftw3f_dep, pybind11_dep],
+  install: true,
+  subdir: 'pyfftconv',
+)
+
+py.install_sources(
+  'pyfftconv/__init__.py',
+  'pyfftconv/__init__.pyi',
+  'pyfftconv/_pyfftconv.pyi',
+  subdir: 'pyfftconv',
+)
diff --git a/pyproject.toml b/pyproject.toml
index 0532171..baf736f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
-requires = ["scikit-build-core>=0.10", "pybind11"]
-build-backend = "scikit_build_core.build"
+requires = ["meson-python", "pybind11"]
+build-backend = "mesonpy"
 
 [project]
 name = "pyfftconv"
@@ -16,20 +16,3 @@ classifiers = [
 
 [project.urls]
 homepage = "https://github.com/kwsp/fftconv"
-
-[tool.scikit-build]
-minimum-version = "build-system.requires"
-logging.level = "INFO"
-
-cmake.version = ">=3.20"
-cmake.args = [
-    "--preset=clang",
-    "-GNinja",        # -GNinja overrides the "Ninja Multi-Config" defined in the preset
-]
-cmake.build-type = "Release"
-cmake.source-dir = "."
-
-build.verbose = true
-build.targets = ["_pyfftconv"]
-
-wheel.packages = ["py/pyfftconv"]
diff --git a/test/meson.build b/test/meson.build
new file mode 100644
index 0000000..27e0fcc
--- /dev/null
+++ b/test/meson.build
@@ -0,0 +1,20 @@
+armadillo_dep = dependency('armadillo')
+gtest_dep = dependency('gtest')
+
+test_common_deps = [fftw3_dep, fftw3f_dep, armadillo_dep]
+
+executable('test_script_fftconv',
+  'test_script.cpp',
+  include_directories : fftconv_inc,
+  dependencies : test_common_deps,
+)
+
+test_fftconv_exe = executable('test_fftconv',
+  'test_fftw.cpp',
+  'test_fftconv.cpp',
+  'test_hilbert.cpp',
+  include_directories : fftconv_inc,
+  dependencies : test_common_deps + [gtest_dep],
+)
+
+test('fftconv', test_fftconv_exe, protocol : 'gtest')
diff --git a/test/test_fftconv.cpp b/test/test_fftconv.cpp
index b01164b..70fe579 100644
--- a/test/test_fftconv.cpp
+++ b/test/test_fftconv.cpp
@@ -3,7 +3,6 @@
 #include <array>
 #include <complex>
 #include <fftw3.h>
-#include <fmt/format.h>
 #include <gtest/gtest.h>
 #include <span>
 #include <vector>
@@ -186,13 +185,6 @@ TEST(Convolve, Same) {
   test_conv<float, mode>(fftconv::convolve_fftw<float, mode>);
 }
 
-TEST(Convolve, PlannerFlag) {
-  constexpr auto mode = ConvMode::Full;
-  using T = double;
-  test_conv<T, mode>(fftconv::convolve_fftw<T, mode, FFTW_ESTIMATE>);
-  test_conv<T, mode>(fftconv::convolve_fftw<T, mode, FFTW_PATIENT>);
-  test_conv<T, mode>(fftconv::convolve_fftw<T, mode, FFTW_EXHAUSTIVE>);
-}
 
 TEST(OAConvolve, Full) {
   constexpr auto mode = ConvMode::Full;
@@ -207,14 +199,5 @@ TEST(OAConvolve, Same) {
   test_oaconv<float, mode>(fftconv::oaconvolve_fftw<float, mode>);
 }
 
-TEST(OAConvolve, PlannerFlag) {
-  using T = double;
-  constexpr auto mode = ConvMode::Same;
-
-  test_oaconv<T, mode>(fftconv::oaconvolve_fftw<T, mode, FFTW_ESTIMATE>);
-  test_oaconv<T, mode>(fftconv::oaconvolve_fftw<T, mode, FFTW_MEASURE>);
-  test_oaconv<T, mode>(fftconv::oaconvolve_fftw<T, mode, FFTW_PATIENT>);
-  test_oaconv<T, mode>(fftconv::oaconvolve_fftw<T, mode, FFTW_EXHAUSTIVE>);
-}
 
 // NOLINTEND(*-magic-numbers,*-array-index)
diff --git a/test/test_fftw.cpp b/test/test_fftw.cpp
index 99469c7..bd47887 100644
--- a/test/test_fftw.cpp
+++ b/test/test_fftw.cpp
@@ -285,8 +285,6 @@ TEST_F(FFTWPlanCreateC2CSplit, GuruPlanSplitCorrect) {
       0.,          1.10366614,  0.83078287,  -2.11848431, 0.42226181,
       -1.67620125, 0.23602404,  -0.43823534, 0.35119795,  0.70254131};
 
-  alignas(32) std::array<T, 20> ri_{};
-  alignas(32) std::array<T, 20> ro_{};
 
   auto pf = fftw::Plan<T>::guru_split_dft(rank, &dim, howmany, &howmany_dim,
                                           ri.data(), ii.data(), ro.data(),
diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp
index 86ef93d..f7ed8c5 100644
--- a/test/test_helpers.hpp
+++ b/test/test_helpers.hpp
@@ -1,7 +1,8 @@
 #include <cassert>
+#include <chrono>
 #include <cstdlib>
-#include <fmt/core.h>
 #include <functional>
+#include <iostream>
 
 // Run the `callable` `n_runs` times and print the time.
 inline void timeit(const std::string &name,
@@ -13,5 +14,5 @@ inline void timeit(const std::string &name,
   }
   const auto elapsed =
       duration_cast<milliseconds>(high_resolution_clock::now() - start);
-  fmt::println("    ({} runs) {} took {}ms", n_runs, name, elapsed.count());
+  std::cout << "    (" << n_runs << " runs) " << name << " took " << elapsed.count() << "ms\n";
 }
\ No newline at end of file
diff --git a/test/test_script.cpp b/test/test_script.cpp
index b9b8117..d46691b 100644
--- a/test/test_script.cpp
+++ b/test/test_script.cpp
@@ -2,7 +2,6 @@
 #include <array>
 #include <cstdlib>
 #include <cstring>
-#include <fmt/format.h>
 #include <iostream>
 #include <span>
 
@@ -25,6 +24,14 @@ void bench(const arma::Col<T> &input, const arma::Col<T> &kernel) {
       [&]() { fftconv::oaconvolve_fftw<T>(input, kernel, output); }, N_RUNS);
 }
 
+template <typename T> T get_tol() {
+  if constexpr (std::is_same_v<T, double>) {
+    return 1e-9;
+  } else {
+    return 1e-5F;
+  }
+}
+
 template <typename T> void run_bench() {
   constexpr std::array<std::array<size_t, 2>, 4> test_sizes{{
       {1664, 65},
@@ -33,18 +40,11 @@ template <typename T> void run_bench() {
       {4352, 65},
   }};
 
-  T tol{};
-  if constexpr (std::is_same_v<T, double>) {
-    tol = 1e-9;
-  } else {
-    tol = 1e-5f;
-  }
-
   for (const auto [size1, size2] : test_sizes) {
     arma::Col<T> input(size1, arma::fill::randn);
     arma::Col<T> kernel(size2, arma::fill::randn);
 
-    fmt::println("=== test case ({}, {}) ===", size1, size2);
+    std::cout << "=== test case (" << size1 << ", " << size2 << ") ===\n";
 
     arma::Col<T> expected_arma = arma::conv(input, kernel, "same");
     {
@@ -55,11 +55,11 @@ template <typename T> void run_bench() {
                                                  std::span<const T>(kernel),
                                                  std::span<T>(res));
 
-      const auto equal = arma::approx_equal(res, expected_arma, "absdiff", tol);
+      const auto equal = arma::approx_equal(res, expected_arma, "absdiff", get_tol<T>());
       if (!equal) {
-        fmt::println("Test failed.");
+        std::cout << "Test failed.\n";
       } else {
-        fmt::println("Test passed.");
+        std::cout << "Test passed.\n";
       }
     }