diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4e6812f..a0fef01 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,7 +22,7 @@ jobs: core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: submodules: true @@ -60,118 +60,41 @@ jobs: Build-mac: runs-on: macos-latest steps: - - name: Export GitHub Actions cache environment variables - uses: actions/github-script@v7 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: submodules: true - - name: Install system-wide build tools - shell: bash - # Install - # mono: NuGet requires a dotnet runtime - # ninja: Build system - # llvm: Just for clang-tidy. Need to add to path. - # Just add clang-tidy to path, not all of LLVM clang. - run: | - brew install mono ninja llvm - ln -s $(brew --prefix llvm)/bin/clang-tidy /usr/local/bin/clang-tidy - brew install autoconf autoconf-archive automake libtool + - name: Install dependencies + run: brew install meson ninja fftw armadillo googletest google-benchmark pybind11 - - name: Setup VCPKG - shell: bash - run: | - cd ${{ github.workspace }} - git clone https://github.com/microsoft/vcpkg - ${{ github.workspace }}/vcpkg/bootstrap-vcpkg.sh + - name: Meson setup + run: meson setup builddir --buildtype=release - - name: Add NuGet sources - shell: bash - env: - gh_packages_secret: ${{ secrets.GH_PACKAGES_TOKEN }} - if: ${{ env.gh_packages_secret != '' }} - run: | - mono `${{ env.VCPKG_EXE }} fetch nuget | tail -n 1` \ - sources add \ - -Source "${{ env.FEED_URL }}" \ - -StorePasswordInClearText \ - -Name GitHubPackages \ - -UserName "${{ env.USERNAME }}" \ - -Password "${{ secrets.GH_PACKAGES_TOKEN }}" - mono `${{ env.VCPKG_EXE }} fetch nuget | tail -n 1` \ - setapikey "${{ secrets.GH_PACKAGES_TOKEN }}" \ - -Source "${{ env.FEED_URL }}" + - name: Build + run: meson compile -C builddir - - name: CMake configure - shell: bash - run: cmake --preset clang - - - name: CMake build - shell: bash - run: cmake --build --preset clang-release - - - name: CTest - shell: bash - run: ctest --output-on-failure --test-dir build/clang/test/ + - name: Test + run: meson test -C builddir --print-errorlogs Build-linux: runs-on: ubuntu-24.04 steps: - - name: Export GitHub Actions cache environment variables - uses: actions/github-script@v7 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: submodules: true - - name: Install system dependencies - shell: bash + - name: Install dependencies run: | sudo apt-get update - sudo apt-get install -y mono-devel ninja-build pkg-config cmake clang clang-tidy - - - name: Setup VCPKG - shell: bash - run: | - cd ${{ github.workspace }} - git clone https://github.com/microsoft/vcpkg - ${{ github.workspace }}/vcpkg/bootstrap-vcpkg.sh + sudo apt-get install -y meson ninja-build pkg-config clang \ + libfftw3-dev libarmadillo-dev libgtest-dev libbenchmark-dev \ + pybind11-dev - - name: Add NuGet sources - shell: bash - env: - gh_packages_secret: ${{ secrets.GH_PACKAGES_TOKEN }} - if: ${{ env.gh_packages_secret != '' }} - run: | - # Use nuget via mono (since mono is not pre-installed on ubuntu, and vcpkg fetches the Windows nuget binary) - mono `${{ env.VCPKG_EXE }} fetch nuget | tail -n 1` \ - sources add \ - -Source "${{ env.FEED_URL }}" \ - -StorePasswordInClearText \ - -Name GitHubPackages \ - -UserName "${{ env.USERNAME }}" \ - -Password "${{ secrets.GH_PACKAGES_TOKEN }}" - mono `${{ env.VCPKG_EXE }} fetch nuget | tail -n 1` \ - setapikey "${{ secrets.GH_PACKAGES_TOKEN }}" \ - -Source "${{ env.FEED_URL }}" + - name: Meson setup + run: CC=clang CXX=clang++ meson setup builddir --buildtype=release - - name: CMake configure - shell: bash - run: cmake --preset clang - - - name: CMake build - shell: bash - run: cmake --build --preset clang-release + - name: Build + run: meson compile -C builddir - - name: CTest - shell: bash - run: ctest --output-on-failure --test-dir build/clang/test/ + - name: Test + run: meson test -C builddir --print-errorlogs diff --git a/.gitignore b/.gitignore index b7aed4d..2b07d51 100644 --- a/.gitignore +++ b/.gitignore @@ -13,7 +13,7 @@ __pycache__/ # Distribution / packaging .Python -build/ +build*/ develop-eggs/ dist/ downloads/ diff --git a/.vscode/settings.json b/.vscode/settings.json index 9e26dfe..b346d75 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1 +1,3 @@ -{} \ No newline at end of file +{ + "C_Cpp.default.configurationProvider": "mesonbuild.mesonbuild", +} \ No newline at end of file diff --git a/benchmark/meson.build b/benchmark/meson.build new file mode 100644 index 0000000..cee8511 --- /dev/null +++ b/benchmark/meson.build @@ -0,0 +1,9 @@ +benchmark_dep = dependency('benchmark') +armadillo_dep = dependency('armadillo') + +executable('bench_fftconv', + 'bench_fftconv.cpp', + 'bench_hilbert.cpp', + include_directories : fftconv_inc, + dependencies : [fftw3_dep, fftw3f_dep, benchmark_dep, armadillo_dep], +) diff --git a/include/fftconv/fftconv.hpp b/include/fftconv/fftconv.hpp index dc3e923..d32e4a4 100644 --- a/include/fftconv/fftconv.hpp +++ b/include/fftconv/fftconv.hpp @@ -8,10 +8,8 @@ #include #include #include -#include #include #include -#include // NOLINTBEGIN(*-reinterpret-cast, *-const-cast, *-pointer-arithmetic) @@ -386,13 +384,12 @@ struct FFTConvEngine : public fftw::cache_mixin> { const size_t fft_size = buf.real.size(); const size_t step_size = fft_size - (k.size() - 1); + const T fct = static_cast(1. / fft_size); // forward fft of kernel and save to complex2 internal::copy_to_padded_buffer(k, buf.real); forward.execute_dft_r2c(buf.real_ptr(), buf.cx2_ptr()); - const auto fct = static_cast(1. / fft_size); - if constexpr (Mode == ConvMode::Full) { assert(a.size() + k.size() - 1 == out.size()); @@ -450,7 +447,7 @@ struct FFTConvEngine : public fftw::cache_mixin> { // * Cache fftw_plan // * Reuse buffers (no malloc on second call to the same convolution size) // https://en.wikipedia.org/w/index.php?title=Convolution#Fast_convolution_algorithms -template void convolve_fftw(const std::span input, const std::span kernel, std::span output) { @@ -471,7 +468,7 @@ For "Same" mode, output_size == input_size 2. convolve with kernel using fft of length N. 3. add blocks together */ -template void oaconvolve_fftw(std::span input, std::span kernel, std::span output) { diff --git a/include/fftconv/fftw.hpp b/include/fftconv/fftw.hpp index 561d83b..c5bf3c8 100644 --- a/include/fftconv/fftw.hpp +++ b/include/fftconv/fftw.hpp @@ -7,8 +7,8 @@ A C++ FFTW wrapper #include #include #include -#include #include +#include #include #include @@ -20,7 +20,8 @@ A C++ FFTW wrapper #include #endif -// NOLINTBEGIN(*-pointer-arithmetic, *-macro-usage, *-const-cast) +// NOLINTBEGIN(*-pointer-arithmetic, *-macro-usage, *-const-cast, +// *-math-missing-parenthesis) namespace fftw { @@ -31,8 +32,10 @@ struct WisdomSetup { explicit WisdomSetup(bool threadSafe) { static bool callSetup = true; if (threadSafe && callSetup) { +#if defined FFTW_HAVE_THREADS fftw_make_planner_thread_safe(); fftwf_make_planner_thread_safe(); +#endif callSetup = false; } fftw_import_wisdom_from_filename(".fftw_wisdom"); @@ -625,7 +628,7 @@ Helper functions out[i] += in[i] * fct */ template -inline void normalize_add(T *out, T *in, size_t len, T fct) { +inline void normalize_add(T *out, const T *in, size_t len, T fct) { for (size_t i = 0; i < len; ++i) { out[i] += in[i] * fct; } @@ -637,9 +640,7 @@ out[i] += in[i] * fct template inline void normalize_add(std::span out, std::span in, T fct) { const auto len = std::min(out.size(), in.size()); - for (size_t i = 0; i < len; ++i) { - out[i] += in[i] * fct; - } + normalize_add(out.data(), in.data(), len, fct); } /** @@ -925,4 +926,5 @@ void scale_imag_and_magnitude(T const *real, T const *imag, T fct, size_t n, } // namespace fftw -// NOLINTEND(*-pointer-arithmetic, *-macro-usage, *-const-cast) +// NOLINTEND(*-pointer-arithmetic, *-macro-usage, *-const-cast, +// *-math-missing-parenthesis) diff --git a/include/fftconv/hilbert.hpp b/include/fftconv/hilbert.hpp index b688590..4c3b93d 100644 --- a/include/fftconv/hilbert.hpp +++ b/include/fftconv/hilbert.hpp @@ -5,7 +5,8 @@ #include #include -// NOLINTBEGIN(*-pointer-arithmetic, *-magic-numbers) +// NOLINTBEGIN(*-pointer-arithmetic, *-magic-numbers, +// *-math-missing-parenthesis) namespace fftconv { @@ -15,19 +16,19 @@ Uses FFTW's r2c transform */ template void hilbert(const std::span x, const std::span env) { - const auto n = x.size(); + const size_t n = x.size(); assert(n > 0); assert(x.size() == env.size()); fftw::EngineR2C1D &engine = fftw::EngineR2C1D::get(n); fftw::R2CBuffer &buf = engine.buf; - if (isSIMDAligned<64>(x.data())) { + if (isSIMDAligned<32>(x.data())) { // Avoid a copy engine.forward(x.data(), buf.out); } else { // Copy input to real buffer - for (int i = 0; i < n; ++i) { + for (size_t i = 0; i < n; ++i) { buf.in[i] = x[i]; } @@ -36,8 +37,8 @@ void hilbert(const std::span x, const std::span env) { } // Multiply by 1j (skip DC and Nyquist) - const auto cx_size = n / 2 + 1; - for (auto i = 0; i < cx_size; ++i) { + const size_t cx_size = n / 2 + 1; + for (size_t i = 0; i < cx_size; ++i) { // Skip DC (0 Hz) and Nyquist (n/2 Hz when n is even) if (i == 0 || (n % 2 == 0 && i == cx_size - 1)) { buf.out[i][0] = 0.0; @@ -56,7 +57,7 @@ void hilbert(const std::span x, const std::span env) { // Take the abs of the analytic signal const T fct = static_cast(1. / n); - for (auto i = 0; i < n; ++i) { + for (size_t i = 0; i < n; ++i) { const auto real = x[i]; const auto imag = buf.in[i] * fct; env[i] = std::sqrt(real * real + imag * imag); @@ -182,4 +183,4 @@ void hilbert(const std::span x, const std::span env) { } // namespace fftconv -// NOLINTEND(*-pointer-arithmetic, *-magic-numbers) +// NOLINTEND(*-pointer-arithmetic, *-magic-numbers, *-math-missing-parenthesis) diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..9ac664c --- /dev/null +++ b/meson.build @@ -0,0 +1,13 @@ +project('fftconv', 'c', 'cpp', + version : '0.5.1', + default_options : ['cpp_std=c++20', 'warning_level=2'], +) + +fftconv_inc = include_directories('include') + +fftw3_dep = dependency('fftw3') +fftw3f_dep = dependency('fftw3f') + +subdir('test') +subdir('benchmark') +subdir('py') diff --git a/py/meson.build b/py/meson.build new file mode 100644 index 0000000..515edc5 --- /dev/null +++ b/py/meson.build @@ -0,0 +1,18 @@ +py = import('python').find_installation(pure: false) +pybind11_dep = dependency('pybind11') + +py.extension_module( + '_pyfftconv', + 'main.cpp', + include_directories: fftconv_inc, + dependencies: [fftw3_dep, fftw3f_dep, pybind11_dep], + install: true, + subdir: 'pyfftconv', +) + +py.install_sources( + 'pyfftconv/__init__.py', + 'pyfftconv/__init__.pyi', + 'pyfftconv/_pyfftconv.pyi', + subdir: 'pyfftconv', +) diff --git a/pyproject.toml b/pyproject.toml index 0532171..baf736f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["scikit-build-core>=0.10", "pybind11"] -build-backend = "scikit_build_core.build" +requires = ["meson-python", "pybind11"] +build-backend = "mesonpy" [project] name = "pyfftconv" @@ -16,20 +16,3 @@ classifiers = [ [project.urls] homepage = "https://github.com/kwsp/fftconv" - -[tool.scikit-build] -minimum-version = "build-system.requires" -logging.level = "INFO" - -cmake.version = ">=3.20" -cmake.args = [ - "--preset=clang", - "-GNinja", # -GNinja overrides the "Ninja Multi-Config" defined in the preset -] -cmake.build-type = "Release" -cmake.source-dir = "." - -build.verbose = true -build.targets = ["_pyfftconv"] - -wheel.packages = ["py/pyfftconv"] diff --git a/test/meson.build b/test/meson.build new file mode 100644 index 0000000..27e0fcc --- /dev/null +++ b/test/meson.build @@ -0,0 +1,20 @@ +armadillo_dep = dependency('armadillo') +gtest_dep = dependency('gtest') + +test_common_deps = [fftw3_dep, fftw3f_dep, armadillo_dep] + +executable('test_script_fftconv', + 'test_script.cpp', + include_directories : fftconv_inc, + dependencies : test_common_deps, +) + +test_fftconv_exe = executable('test_fftconv', + 'test_fftw.cpp', + 'test_fftconv.cpp', + 'test_hilbert.cpp', + include_directories : fftconv_inc, + dependencies : test_common_deps + [gtest_dep], +) + +test('fftconv', test_fftconv_exe, protocol : 'gtest') diff --git a/test/test_fftconv.cpp b/test/test_fftconv.cpp index b01164b..70fe579 100644 --- a/test/test_fftconv.cpp +++ b/test/test_fftconv.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -186,13 +185,6 @@ TEST(Convolve, Same) { test_conv(fftconv::convolve_fftw); } -TEST(Convolve, PlannerFlag) { - constexpr auto mode = ConvMode::Full; - using T = double; - test_conv(fftconv::convolve_fftw); - test_conv(fftconv::convolve_fftw); - test_conv(fftconv::convolve_fftw); -} TEST(OAConvolve, Full) { constexpr auto mode = ConvMode::Full; @@ -207,14 +199,5 @@ TEST(OAConvolve, Same) { test_oaconv(fftconv::oaconvolve_fftw); } -TEST(OAConvolve, PlannerFlag) { - using T = double; - constexpr auto mode = ConvMode::Same; - - test_oaconv(fftconv::oaconvolve_fftw); - test_oaconv(fftconv::oaconvolve_fftw); - test_oaconv(fftconv::oaconvolve_fftw); - test_oaconv(fftconv::oaconvolve_fftw); -} // NOLINTEND(*-magic-numbers,*-array-index) diff --git a/test/test_fftw.cpp b/test/test_fftw.cpp index 99469c7..bd47887 100644 --- a/test/test_fftw.cpp +++ b/test/test_fftw.cpp @@ -285,8 +285,6 @@ TEST_F(FFTWPlanCreateC2CSplit, GuruPlanSplitCorrect) { 0., 1.10366614, 0.83078287, -2.11848431, 0.42226181, -1.67620125, 0.23602404, -0.43823534, 0.35119795, 0.70254131}; - alignas(32) std::array ri_{}; - alignas(32) std::array ro_{}; auto pf = fftw::Plan::guru_split_dft(rank, &dim, howmany, &howmany_dim, ri.data(), ii.data(), ro.data(), diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index 86ef93d..f7ed8c5 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -1,7 +1,8 @@ #include +#include #include -#include #include +#include // Run the `callable` `n_runs` times and print the time. inline void timeit(const std::string &name, @@ -13,5 +14,5 @@ inline void timeit(const std::string &name, } const auto elapsed = duration_cast(high_resolution_clock::now() - start); - fmt::println(" ({} runs) {} took {}ms", n_runs, name, elapsed.count()); + std::cout << " (" << n_runs << " runs) " << name << " took " << elapsed.count() << "ms\n"; } \ No newline at end of file diff --git a/test/test_script.cpp b/test/test_script.cpp index b9b8117..d46691b 100644 --- a/test/test_script.cpp +++ b/test/test_script.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include @@ -25,6 +24,14 @@ void bench(const arma::Col &input, const arma::Col &kernel) { [&]() { fftconv::oaconvolve_fftw(input, kernel, output); }, N_RUNS); } +template T get_tol() { + if constexpr (std::is_same_v) { + return 1e-9; + } else { + return 1e-5F; + } +} + template void run_bench() { constexpr std::array, 4> test_sizes{{ {1664, 65}, @@ -33,18 +40,11 @@ template void run_bench() { {4352, 65}, }}; - T tol{}; - if constexpr (std::is_same_v) { - tol = 1e-9; - } else { - tol = 1e-5f; - } - for (const auto [size1, size2] : test_sizes) { arma::Col input(size1, arma::fill::randn); arma::Col kernel(size2, arma::fill::randn); - fmt::println("=== test case ({}, {}) ===", size1, size2); + std::cout << "=== test case (" << size1 << ", " << size2 << ") ===\n"; arma::Col expected_arma = arma::conv(input, kernel, "same"); { @@ -55,11 +55,11 @@ template void run_bench() { std::span(kernel), std::span(res)); - const auto equal = arma::approx_equal(res, expected_arma, "absdiff", tol); + const auto equal = arma::approx_equal(res, expected_arma, "absdiff", get_tol()); if (!equal) { - fmt::println("Test failed."); + std::cout << "Test failed.\n"; } else { - fmt::println("Test passed."); + std::cout << "Test passed.\n"; } }