From 29554fecaf44575dbbfa3c6c7b165d256a0d9624 Mon Sep 17 00:00:00 2001 From: Vincent Neiger Date: Sat, 11 Apr 2026 08:57:53 +0200 Subject: [PATCH 1/8] add 'make profile' to CI --- .github/workflows/CI.yml | 223 +++++---------------------------------- 1 file changed, 26 insertions(+), 197 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index a80071ea..0eb3d33c 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -83,6 +83,11 @@ jobs: cd flint-extras $MAKE tests + - name: "Compile profile" + run: | + cd flint-extras + $MAKE profile + - name: "Check" run: | cd flint-extras @@ -124,7 +129,7 @@ jobs: libtool --version echo "MAKE=make -j$(expr $(nproc) + 1) --output-sync=target" >> $GITHUB_ENV - # install FLINT dev version + # install FLINT 3.4.0 version git clone --depth=1 https://github.com/flintlib/flint.git --branch v3.4.0 --single-branch cd flint ./bootstrap.sh @@ -151,6 +156,11 @@ jobs: cd flint-extras $MAKE tests + - name: "Compile profile" + run: | + cd flint-extras + $MAKE profile + - name: "Check" run: | cd flint-extras @@ -220,6 +230,11 @@ jobs: cd flint-extras $MAKE tests + - name: "Compile profile" + run: | + cd flint-extras + $MAKE profile + - name: "Check" run: | cd flint-extras @@ -283,6 +298,11 @@ jobs: cd flint-extras $MAKE tests + - name: "Compile profile" + run: | + cd flint-extras + $MAKE profile + - name: "Check" run: | cd flint-extras @@ -355,204 +375,13 @@ jobs: cd flint-extras $MAKE tests + - name: "Compile profile" + run: | + cd flint-extras + $MAKE profile + - name: "Check" run: | cd flint-extras $MAKE check - - ############################################################################# - # mingw with gcc - ############################################################################# - #mingw64-gcc: - # name: MinGW GCC (x0.5) - - # runs-on: windows-latest - - # defaults: - # run: - # shell: msys2 {0} - - # env: - # CC: "gcc" - # FLINT_TEST_MULTIPLIER: "0.5" - - # steps: - # - uses: actions/checkout@v6 - - # - name: "Setup MinGW" - # uses: msys2/setup-msys2@v2 - # with: - # msystem: mingw64 - # update: true - # install: bc mingw-w64-x86_64-gcc mingw-w64-x86_64-autotools - - # - name: "Rescale multiplier" - # run: | - # FLINT_TEST_MULTIPLIER=$(echo "${FLINT_TEST_MULTIPLIER} * ${GLOBAL_MULTIPLIER}" | bc) - # echo "FLINT_TEST_MULTIPLIER=${FLINT_TEST_MULTIPLIER}" - # echo "FLINT_TEST_MULTIPLIER=${FLINT_TEST_MULTIPLIER}" >> $GITHUB_ENV - - # - name: "Setup" - # run: | - # gcc --version - # make --version - # autoconf --version - # libtool --version - # echo "MAKE=make -j$(expr $(nproc) + 1) --output-sync=target" >> $GITHUB_ENV - - # - name: "Configure" - # run: | - # ./bootstrap.sh - # ./configure \ - # CC=${CC} \ - # --disable-debug - - # - name: "Compile library" - # run: | - # ${MAKE} - - # - name: "Compile tests" - # run: | - # ${MAKE} tests - - # - name: "Check" - # run: | - # ${MAKE} check - - - - ############################################################################## - # msvc - ############################################################################## - #msvc: - # name: MSVC (x1) - - # runs-on: windows-latest - # env: - # FLINT_TEST_MULTIPLIER: 1 - # TIMEOUT: 150 - - # steps: - # - name: "Rescale multiplier (powershell)" - # run: | - # $FLINT_TEST_MULTIPLIER = $env:FLINT_TEST_MULTIPLIER * $env:GLOBAL_MULTIPLIER - # echo "FLINT_TEST_MULTIPLIER=$FLINT_TEST_MULTIPLIER | Out-File -Append -FilePath $env:GITHUB_ENV" - # $TIMEOUT = $env:TIMEOUT * $env:GLOBAL_MULTIPLIER - # echo "TIMEOUT=$TIMEOUT | Out-File -Append -FilePath $env:GITHUB_ENV" - # shell: powershell - - # - uses: actions/checkout@v6 - - # - name: "Setup cache for dependencies" - # uses: actions/github-script@v7 - # with: - # script: | - # core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - # core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - # - name: "Install dependencies" - # run: | - # vcpkg install gmp mpfr pthreads --binarysource="clear;x-gha,readwrite" - - # - name: "Setup MSVC" - # uses: ilammy/msvc-dev-cmd@v1.13.0 - # with: - # arch: x86_64 - - # - name: "Configure" - # run: | - # mkdir build - # cd build - # # For single build, we need atomics - # cmake ` - # -G "Ninja" ` - # -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake ` - # -DCMAKE_C_FLAGS="/wd4018 /wd4146 /wd4244 /wd4267 /wd4305 /wd4996" ` - # -DBUILD_TESTING=ON ` - # -DCMAKE_BUILD_TYPE=Release ` - # .. - - # - name: "Build" - # run: | - # cd build - # # NOTE: Number of threads for Github's CI runners are 4. - # cmake --build . -j5 - - # - name: "Check" - # run: | - # cd build - # set "FLINT_TEST_MULTIPLIER=$env:FLINT_TEST_MULTIPLIER" - # ctest -j5 --output-on-failure --timeout $env:TIMEOUT - # shell: powershell - - - - ############################################################################## - # alpine linux, musl, 32-bit (assert) - ############################################################################## - #alpine-32bit: - # name: Alpine Linux, musl, 32-bit (assert, x1.5) - - # runs-on: ubuntu-24.04 - - # env: - # CC: "gcc" - # FLINT_TEST_MULTIPLIER: "1.5" - - # steps: - # - name: "Rescale multiplier" - # run: | - # FLINT_TEST_MULTIPLIER=$(echo "${FLINT_TEST_MULTIPLIER} * ${GLOBAL_MULTIPLIER}" | bc) - # echo "FLINT_TEST_MULTIPLIER=${FLINT_TEST_MULTIPLIER}" - # echo "FLINT_TEST_MULTIPLIER=${FLINT_TEST_MULTIPLIER}" >> $GITHUB_ENV - - # - uses: actions/checkout@v6 - - # - name: "Setup latest Alpine Linux" - # uses: jirutka/setup-alpine@v1 - # with: - # arch: x86 - # branch: edge - # packages: > - # gmp-dev - # mpfr-dev - # gcc - # musl-dev - # make - # autoconf - # automake - # libtool - - # - name: "Setup" - # run: | - # gcc --version - # make --version - # autoconf --version - # libtool --version - # echo "MAKE=make -j$(expr $(nproc) + 1) --output-sync=target" >> $GITHUB_ENV - # shell: alpine.sh {0} - - # - name: "Configure" - # run: | - # ./bootstrap.sh - # ./configure \ - # CC=${CC} \ - # --enable-assert \ - # --disable-debug - # shell: alpine.sh {0} - - # - name: "Compile library" - # run: | - # $MAKE - # shell: alpine.sh {0} - - # - name: "Compile tests" - # run: | - # $MAKE tests - # shell: alpine.sh {0} - - # - name: "Check" - # run: | - # $MAKE check - # shell: alpine.sh {0} From d750687f1cd06abf04a96262f0f9657bf2a28eff Mon Sep 17 00:00:00 2001 From: Vincent Neiger Date: Sat, 11 Apr 2026 09:32:31 +0200 Subject: [PATCH 2/8] only check 'make profile' for flint-dev --- .github/workflows/CI.yml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 0eb3d33c..4dbe62d2 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -156,11 +156,6 @@ jobs: cd flint-extras $MAKE tests - - name: "Compile profile" - run: | - cd flint-extras - $MAKE profile - - name: "Check" run: | cd flint-extras @@ -230,11 +225,6 @@ jobs: cd flint-extras $MAKE tests - - name: "Compile profile" - run: | - cd flint-extras - $MAKE profile - - name: "Check" run: | cd flint-extras @@ -298,11 +288,6 @@ jobs: cd flint-extras $MAKE tests - - name: "Compile profile" - run: | - cd flint-extras - $MAKE profile - - name: "Check" run: | cd flint-extras From f30ad217815ab548b02c57b7127176a7daf5616c Mon Sep 17 00:00:00 2001 From: Vincent Neiger Date: Sat, 11 Apr 2026 09:34:54 +0200 Subject: [PATCH 3/8] add note in README about dev version if running make profile --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ea361f80..42a89b9c 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,9 @@ matrices, structured matrices, and their applications. Version 0.5 Warning: the FLINT-based part of PML (flint-extras folder) is work in progress. -Currently, it requires FLINT version 3.4.0 (November 2025) or later. +Currently, it requires FLINT version 3.4.0 (November 2025) or later. Compiling +benchmark files with `make profile` might further require the current +development version of FLINT. ## Authors From e658a117a03f78a27c2840465af37e8d2dcea539 Mon Sep 17 00:00:00 2001 From: Vincent Neiger Date: Sat, 11 Apr 2026 09:41:44 +0200 Subject: [PATCH 4/8] fixes some compilation warnings --- .../src/nmod_mat_extra/profile/p-mul_avx.c | 2 +- .../profile/time_nmod_mat_vec_mul.c | 183 ------------------ .../profile/time_nmod_vec_dot_product_multi.c | 4 +- 3 files changed, 2 insertions(+), 187 deletions(-) delete mode 100644 flint-extras/src/nmod_mat_extra/profile/time_nmod_mat_vec_mul.c diff --git a/flint-extras/src/nmod_mat_extra/profile/p-mul_avx.c b/flint-extras/src/nmod_mat_extra/profile/p-mul_avx.c index fd54f3ad..2dd11ecb 100644 --- a/flint-extras/src/nmod_mat_extra/profile/p-mul_avx.c +++ b/flint-extras/src/nmod_mat_extra/profile/p-mul_avx.c @@ -131,7 +131,7 @@ void time_nmod_mat_mul(ulong len, ulong nbits, ulong n, flint_rand_t state) /*--------------------------------------------------------------*/ /* main calls time */ /*--------------------------------------------------------------*/ -int main() +int main(int FLINT_UNUSED(argc), char ** FLINT_UNUSED(argv)) { flint_rand_t state; flint_rand_init(state); diff --git a/flint-extras/src/nmod_mat_extra/profile/time_nmod_mat_vec_mul.c b/flint-extras/src/nmod_mat_extra/profile/time_nmod_mat_vec_mul.c deleted file mode 100644 index fed33ecd..00000000 --- a/flint-extras/src/nmod_mat_extra/profile/time_nmod_mat_vec_mul.c +++ /dev/null @@ -1,183 +0,0 @@ -#include -#include -#include -#include - -#include "nmod_mat_extra.h" -#include "nmod_vec_extra.h" - -/*--------------------------------------------------------------*/ -/* computes a matrix-vector product in size r x c modulo n */ -/*--------------------------------------------------------------*/ -void time_nmod_mat_vec_mul(slong r, slong c, ulong n) -{ - flint_rand_t state; - flint_rand_init(state); - - nmod_t mod; - nmod_init(&mod, n); - - double t; - clock_t tt; - long nb_iter; - - nmod_mat_t A; - nmod_mat_init(A, r, c, mod.n); - nmod_mat_rand(A, state); - - ulong * u = flint_malloc(c * sizeof(ulong)); - ulong * v = flint_malloc(r * sizeof(ulong)); - nmod_mat_t umat; nmod_mat_init(umat, c, 1, n); - nmod_mat_t vmat1; nmod_mat_init(vmat1, r, 1, n); - nmod_mat_t vmat2; nmod_mat_init(vmat2, 1, r, n); - - t = 0.0; - nb_iter = 0; - while (t < 0.5) - { - _nmod_vec_rand(u, state, c, mod); - tt = clock(); - nmod_mat_mul_nmod_vec(v, A, u, c); - nmod_mat_mul_nmod_vec(v, A, u, c); - nmod_mat_mul_nmod_vec(v, A, u, c); - nmod_mat_mul_nmod_vec(v, A, u, c); - nmod_mat_mul_nmod_vec(v, A, u, c); - t += (double)(clock()-tt) / CLOCKS_PER_SEC; - nb_iter += 5; - } - t /= nb_iter; - printf("%4g\t", t); - - t = 0.0; - nb_iter = 0; - while (t < 0.5) - { - nmod_mat_rand(umat, state); - tt = clock(); - nmod_mat_mul(vmat1, A, umat); - nmod_mat_mul(vmat1, A, umat); - nmod_mat_mul(vmat1, A, umat); - nmod_mat_mul(vmat1, A, umat); - nmod_mat_mul(vmat1, A, umat); - t += (double)(clock()-tt) / CLOCKS_PER_SEC; - nb_iter += 5; - } - t /= nb_iter; - printf("%4g\t", t); - - //t = 0.0; - //nb_iter = 0; - //while (t < 0.5) - //{ - // nmod_mat_rand(umat, state); - // tt = clock(); - // nmod_mat_mul_blas(vmat1, A, umat); - // nmod_mat_mul_blas(vmat1, A, umat); - // nmod_mat_mul_blas(vmat1, A, umat); - // nmod_mat_mul_blas(vmat1, A, umat); - // nmod_mat_mul_blas(vmat1, A, umat); - // t += (double)(clock()-tt) / CLOCKS_PER_SEC; - // nb_iter += 5; - //} - //t /= nb_iter; - //printf("%4g\t", t); - - t = 0.0; - nb_iter = 0; - while (t < 0.5) - { - _nmod_vec_rand(v, state, c, mod); - tt = clock(); - nmod_mat_nmod_vec_mul(u, v, r, A); - nmod_mat_nmod_vec_mul(u, v, r, A); - nmod_mat_nmod_vec_mul(u, v, r, A); - nmod_mat_nmod_vec_mul(u, v, r, A); - nmod_mat_nmod_vec_mul(u, v, r, A); - t += (double)(clock()-tt) / CLOCKS_PER_SEC; - nb_iter += 5; - } - t /= nb_iter; - printf("%4g\t", t); - - t = 0.0; - nb_iter = 0; - while (t < 0.5) - { - nmod_mat_rand(vmat2, state); - tt = clock(); - nmod_mat_mul_classical(umat, vmat2, A); - nmod_mat_mul_classical(umat, vmat2, A); - nmod_mat_mul_classical(umat, vmat2, A); - nmod_mat_mul_classical(umat, vmat2, A); - nmod_mat_mul_classical(umat, vmat2, A); - t += (double)(clock()-tt) / CLOCKS_PER_SEC; - nb_iter += 5; - } - t /= nb_iter; - printf("%4g\t", t); - - //t = 0.0; - //nb_iter = 0; - //while (t < 0.5) - //{ - // nmod_mat_rand(vmat2, state); - // tt = clock(); - // nmod_mat_mul_blas(umat, vmat2, A); - // nmod_mat_mul_blas(umat, vmat2, A); - // nmod_mat_mul_blas(umat, vmat2, A); - // nmod_mat_mul_blas(umat, vmat2, A); - // nmod_mat_mul_blas(umat, vmat2, A); - // t += (double)(clock()-tt) / CLOCKS_PER_SEC; - // nb_iter += 5; - //} - //t /= nb_iter; - //printf("%4g\t", t); - - t = 0.0; - nb_iter = 0; - while (t < 0.5) - { - _nmod_vec_rand(u, state, c, mod); - tt = clock(); - nmod_mat_mul_nmod_vec_newdot(v, A, u, c); - nmod_mat_mul_nmod_vec_newdot(v, A, u, c); - nmod_mat_mul_nmod_vec_newdot(v, A, u, c); - nmod_mat_mul_nmod_vec_newdot(v, A, u, c); - nmod_mat_mul_nmod_vec_newdot(v, A, u, c); - t += (double)(clock()-tt) / CLOCKS_PER_SEC; - nb_iter += 5; - } - t /= nb_iter; - printf("%4g\t", t); - - printf("\n"); - - nmod_mat_clear(A); - nmod_mat_clear(umat); - nmod_mat_clear(vmat1); - nmod_mat_clear(vmat2); - flint_free(u); - flint_free(v); - flint_rand_clear(state); -} - -/*--------------------------------------------------------------*/ -/* main calls time */ -/*--------------------------------------------------------------*/ -int main() -{ - //printf("nbits\tr\tc\tmul_nmod_vec\tmatrix mul\tmatrix mul_blas\tnmod_vec_mul\tmatrix mul\tmatrix mul_blas\n"); - printf("nbits\tr\tc\tmul_nmod_vec\tmatrix mul\tnmod_vec_mul\tmatrix mul\tnmod_vec_newdot\n"); - for (slong r = 4; r < 200; r += 10) - { - printf("%d\t%ld\t%ld\t", 32, r, r); - time_nmod_mat_vec_mul(r, r, (1L << 32) - 1); - } - for (slong r = 1; r < 200; r += 5) - { - printf("%d\t%ld\t%ld\t", 61, r, r); - time_nmod_mat_vec_mul(r, r, (1L << 60) + 1); - } - - return 0; -} diff --git a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product_multi.c b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product_multi.c index a9f5c44e..a47bccb4 100644 --- a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product_multi.c +++ b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product_multi.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -457,6 +458,3 @@ int main(int argc, char ** argv) flint_rand_clear(state); return 0; } - -/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s From 6dc0b8f4462353fe029888c4b475faad407d8216 Mon Sep 17 00:00:00 2001 From: Vincent Neiger Date: Sat, 11 Apr 2026 09:45:10 +0200 Subject: [PATCH 5/8] fixes some compilation warnings --- flint-extras/src/nmod32_vec/profile/p-dot_mdot.c | 4 ++-- .../src/nmod_vec_extra/profile/time_nmod_vec_2dot2_split.c | 6 +++--- .../src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c | 3 --- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/flint-extras/src/nmod32_vec/profile/p-dot_mdot.c b/flint-extras/src/nmod32_vec/profile/p-dot_mdot.c index 36981736..5fc4f6dd 100644 --- a/flint-extras/src/nmod32_vec/profile/p-dot_mdot.c +++ b/flint-extras/src/nmod32_vec/profile/p-dot_mdot.c @@ -192,8 +192,8 @@ void sample_dot_msolve_avx2(void * arg, ulong count) FLINT_TEST_CLEAR(state); } #else /* PML_HAVE_AVX2 */ -TIME_VOID_DOT(dot_msolve_avx2, pow2_precomp); -SAMPLE_VOID_DOT(dot_msolve_avx2, pow2_precomp); +TIME_VOID_DOT(dot_msolve_avx2, pow2_precomp) +SAMPLE_VOID_DOT(dot_msolve_avx2, pow2_precomp) #endif /* PML_HAVE_AVX2 */ /*-----------------------*/ diff --git a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_2dot2_split.c b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_2dot2_split.c index 74a438f8..6db94ce1 100644 --- a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_2dot2_split.c +++ b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_2dot2_split.c @@ -13,8 +13,8 @@ void time_nmod_vec_2dot2_split(ulong len, ulong n, flint_rand_t state) nmod_t mod; nmod_init(&mod, n); - nn_ptr v11 = aligned_alloc(32, (4 + ((len >> 2) << 2)) * sizeof(ulong)); - nn_ptr v12 = aligned_alloc(32, (4 + ((len >> 2) << 2)) * sizeof(ulong)); + nn_ptr v11 = flint_aligned_alloc(32, (4 + ((len >> 2) << 2)) * sizeof(ulong)); + nn_ptr v12 = flint_aligned_alloc(32, (4 + ((len >> 2) << 2)) * sizeof(ulong)); nn_ptr v2 = _nmod_vec_init(len); const dot_params_t params = _nmod_vec_dot_params(len, mod); @@ -75,7 +75,7 @@ void time_nmod_vec_2dot2_split(ulong len, ulong n, flint_rand_t state) /*--------------------------------------------------------------*/ /* main calls time */ /*--------------------------------------------------------------*/ -int main() +int main(int FLINT_UNUSED(argc), char ** FLINT_UNUSED(argv)) { flint_rand_t state; flint_rand_init(state); diff --git a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c index 849cb762..64e958a7 100644 --- a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c +++ b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c @@ -658,6 +658,3 @@ int main(int argc, char ** argv) flint_rand_clear(state); return 0; } - -/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s From 442fa0b79cb123e04100e780b6f423652e607acf Mon Sep 17 00:00:00 2001 From: Vincent Neiger Date: Sat, 11 Apr 2026 09:46:29 +0200 Subject: [PATCH 6/8] fixes some compilation warnings --- .../src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c index 64e958a7..b97aecef 100644 --- a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c +++ b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -56,7 +57,6 @@ ulong time_nmod_vec_dot_product_flint_cf(ulong len, ulong n, flint_rand_t state) return res; } - ulong time_nmod_vec_dot_product_flint_cu(ulong len, ulong n, flint_rand_t state) { nmod_t mod; From e996762233bfe4be3444d865c86f4cef6c0e2f07 Mon Sep 17 00:00:00 2001 From: Vincent Neiger Date: Sat, 11 Apr 2026 10:09:45 +0200 Subject: [PATCH 7/8] support non avx2 arch --- .../profile/time_nmod_vec_dot_product.c | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c index b97aecef..dc105667 100644 --- a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c +++ b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c @@ -127,7 +127,11 @@ ulong time_nmod_vec_dot_product_split26_cu(ulong len, ulong n, flint_rand_t stat { // TEST const dot_params_t params = _nmod_vec_dot_params(len, mod); +#ifdef HAVE_AVX2 ulong res_split = _nmod_vec_dot_product_split26_avx(v1[0], v2[0], len, mod); +#else + ulong res_split = _nmod_vec_dot_product_split26(v1[0], v2[0], len, mod); +#endif ulong res_correct = _nmod_vec_dot(v1[0], v2[0], len, mod, params); if (res_split != res_correct) { @@ -144,11 +148,19 @@ ulong time_nmod_vec_dot_product_split26_cu(ulong len, ulong n, flint_rand_t stat while (t1 < TIME_THRES) { for (slong i = 0; i < NB_ITER; i++) // warmup +#ifdef HAVE_AVX2 res[i] += _nmod_vec_dot_product_split26_avx(v1[i], v2[i], len, mod); +#else + res[i] += _nmod_vec_dot_product_split26(v1[i], v2[i], len, mod); +#endif tt = clock(); for (slong i = 0; i < NB_ITER; i++) +#ifdef HAVE_AVX2 res[i] += _nmod_vec_dot_product_split26_avx(v1[i], v2[i], len, mod); +#else + res[i] += _nmod_vec_dot_product_split26(v1[i], v2[i], len, mod); +#endif t1 += (double)(clock()-tt) / CLOCKS_PER_SEC; nb_iter += NB_ITER; } @@ -181,7 +193,11 @@ ulong time_nmod_vec_dot_product_split26_cf(ulong len, ulong n, flint_rand_t stat { // TEST const dot_params_t params = _nmod_vec_dot_params(len, mod); +#ifdef HAVE_AVX2 ulong res_split = _nmod_vec_dot_product_split26_avx(v1, v2, len, mod); +#else + ulong res_split = _nmod_vec_dot_product_split26(v1, v2, len, mod); +#endif ulong res_correct = _nmod_vec_dot(v1, v2, len, mod, params); if (res_split != res_correct) { @@ -198,11 +214,19 @@ ulong time_nmod_vec_dot_product_split26_cf(ulong len, ulong n, flint_rand_t stat while (t1 < TIME_THRES) { for (slong i = 0; i < NB_ITER; i++) // warmup +#ifdef HAVE_AVX2 res += _nmod_vec_dot_product_split26_avx(v1, v2, len, mod); +#else + res += _nmod_vec_dot_product_split26(v1, v2, len, mod); +#endif tt = clock(); for (slong i = 0; i < NB_ITER; i++) +#ifdef HAVE_AVX2 res += _nmod_vec_dot_product_split26_avx(v1, v2, len, mod); +#else + res += _nmod_vec_dot_product_split26(v1, v2, len, mod); +#endif t1 += (double)(clock()-tt) / CLOCKS_PER_SEC; nb_iter += NB_ITER; } @@ -565,7 +589,14 @@ int main(int argc, char ** argv) const slong nbits = 19; const slong bits[] = {17, 20, 23, 26, 29, 30, 31, 32, 33, 40, 50, 55, 57, 59, 60, 61, 62, 63, 64}; +#ifdef HAVE_AVX512 + const slong nfuns = 8; +#elif HAVE_AVX_IFMA const slong nfuns = 6; +#else + const slong nfuns = 4; +#endif + typedef ulong (*timefun) (ulong, ulong, flint_rand_t); const timefun funs[] = { time_nmod_vec_dot_product_flint_cf, // 0 From 2cf0730b0d72c3b7ca3bd9b37f75e783dbd2044e Mon Sep 17 00:00:00 2001 From: Vincent Neiger Date: Sat, 11 Apr 2026 10:21:31 +0200 Subject: [PATCH 8/8] support non avx2 arch --- .../src/nmod32_vec/profile/p-dot_mdot.c | 71 ++++++++- .../profile/time_nmod_vec_dot_product.c | 135 ++---------------- 2 files changed, 79 insertions(+), 127 deletions(-) diff --git a/flint-extras/src/nmod32_vec/profile/p-dot_mdot.c b/flint-extras/src/nmod32_vec/profile/p-dot_mdot.c index 5fc4f6dd..e3e378c4 100644 --- a/flint-extras/src/nmod32_vec/profile/p-dot_mdot.c +++ b/flint-extras/src/nmod32_vec/profile/p-dot_mdot.c @@ -360,6 +360,7 @@ int main(int argc, char ** argv) const slong nlens = 10; const ulong lens[] = {50, 100, 250, 500, 1000, 2500, 5000, 10000, 100000, 1000000}; +#if PML_HAVE_AVX512 // bench functions const slong nfuns = 16; typedef void (*timefun) (time_args, flint_rand_t); @@ -421,6 +422,74 @@ int main(int argc, char ** argv) "#15 --> mdot3_split_avx512 ", }; +#elif PML_HAVE_AVX2 + // bench functions + const slong nfuns = 11; + typedef void (*timefun) (time_args, flint_rand_t); + const timefun funs[] = { + time_dot_split, // 0 + time_dot_split_avx2, // 1 + time_dot_msolve_avx2, // 2 + time_mdot_split, // 3 + time_mdot_split_avx2, // 4 + time_mdot_msolve_via_dot_avx2, // 5 + time_mdot_msolve_native_avx2, // 6 + time_mdot2_split, // 7 + time_mdot2_split_avx2, // 8 + time_mdot3_split_avx2, // 9 + }; + + typedef void (*samplefun) (void*, ulong); + const samplefun sfuns[] = { + sample_dot_split, // 0 + sample_dot_split_avx2, // 1 + sample_dot_msolve_avx2, // 2 + sample_mdot_split, // 3 + sample_mdot_split_avx2, // 4 + sample_mdot_msolve_via_dot_avx2, // 5 + sample_mdot_msolve_native_avx2, // 6 + sample_mdot2_split, // 7 + sample_mdot2_split_avx2, // 8 + sample_mdot3_split_avx2, // 9 + }; + + const char * description[] = { + "#0 --> dot_split ", + "#1 --> dot_split_avx2 ", + "#2 --> dot_msolve_avx2 ", + "#3 --> mdot_split ", + "#4 --> mdot_split_avx2 ", + "#5 --> mdot_msolve_via_dot_avx2 ", + "#6 --> mdot_msolve_native_avx2 ", + "#7 --> mdot2_split ", + "#8 --> mdot2_split_avx2 ", + "#9 --> mdot3_split_avx2 ", + }; + +#else /* i.e. neither PML_HAVE_AVX512 nor PML_HAVE_AVX2 */ + // bench functions + const slong nfuns = 3; + typedef void (*timefun) (time_args, flint_rand_t); + const timefun funs[] = { + time_dot_split, // 0 + time_mdot_split, // 1 + time_mdot2_split, // 2 + }; + + typedef void (*samplefun) (void*, ulong); + const samplefun sfuns[] = { + sample_dot_split, // 0 + sample_mdot_split, // 1 + sample_mdot2_split, // 2 + }; + + const char * description[] = { + "#0 --> dot_split ", + "#1 --> mdot_split ", + "#2 --> mdot2_split ", + }; +#endif + if (argc == 1) // show usage { printf("Usage: `%s [nbits] [len] [fun]`\n", argv[0]); @@ -453,7 +522,7 @@ int main(int argc, char ** argv) for (slong i = 0; i < 3; i++) { time_args targs = {1, 10000, UWORD(1) << 20}; - time_dot_split_avx2(targs, state); + time_dot_split(targs, state); printf(" "); } printf("\n\n"); diff --git a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c index dc105667..9ed159d7 100644 --- a/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c +++ b/flint-extras/src/nmod_vec_extra/profile/time_nmod_vec_dot_product.c @@ -127,7 +127,7 @@ ulong time_nmod_vec_dot_product_split26_cu(ulong len, ulong n, flint_rand_t stat { // TEST const dot_params_t params = _nmod_vec_dot_params(len, mod); -#ifdef HAVE_AVX2 +#ifdef PML_HAVE_AVX2 ulong res_split = _nmod_vec_dot_product_split26_avx(v1[0], v2[0], len, mod); #else ulong res_split = _nmod_vec_dot_product_split26(v1[0], v2[0], len, mod); @@ -148,7 +148,7 @@ ulong time_nmod_vec_dot_product_split26_cu(ulong len, ulong n, flint_rand_t stat while (t1 < TIME_THRES) { for (slong i = 0; i < NB_ITER; i++) // warmup -#ifdef HAVE_AVX2 +#ifdef PML_HAVE_AVX2 res[i] += _nmod_vec_dot_product_split26_avx(v1[i], v2[i], len, mod); #else res[i] += _nmod_vec_dot_product_split26(v1[i], v2[i], len, mod); @@ -156,7 +156,7 @@ ulong time_nmod_vec_dot_product_split26_cu(ulong len, ulong n, flint_rand_t stat tt = clock(); for (slong i = 0; i < NB_ITER; i++) -#ifdef HAVE_AVX2 +#ifdef PML_HAVE_AVX2 res[i] += _nmod_vec_dot_product_split26_avx(v1[i], v2[i], len, mod); #else res[i] += _nmod_vec_dot_product_split26(v1[i], v2[i], len, mod); @@ -193,7 +193,7 @@ ulong time_nmod_vec_dot_product_split26_cf(ulong len, ulong n, flint_rand_t stat { // TEST const dot_params_t params = _nmod_vec_dot_params(len, mod); -#ifdef HAVE_AVX2 +#ifdef PML_HAVE_AVX2 ulong res_split = _nmod_vec_dot_product_split26_avx(v1, v2, len, mod); #else ulong res_split = _nmod_vec_dot_product_split26(v1, v2, len, mod); @@ -214,7 +214,7 @@ ulong time_nmod_vec_dot_product_split26_cf(ulong len, ulong n, flint_rand_t stat while (t1 < TIME_THRES) { for (slong i = 0; i < NB_ITER; i++) // warmup -#ifdef HAVE_AVX2 +#ifdef PML_HAVE_AVX2 res += _nmod_vec_dot_product_split26_avx(v1, v2, len, mod); #else res += _nmod_vec_dot_product_split26(v1, v2, len, mod); @@ -222,7 +222,7 @@ ulong time_nmod_vec_dot_product_split26_cf(ulong len, ulong n, flint_rand_t stat tt = clock(); for (slong i = 0; i < NB_ITER; i++) -#ifdef HAVE_AVX2 +#ifdef PML_HAVE_AVX2 res += _nmod_vec_dot_product_split26_avx(v1, v2, len, mod); #else res += _nmod_vec_dot_product_split26(v1, v2, len, mod); @@ -239,7 +239,7 @@ ulong time_nmod_vec_dot_product_split26_cf(ulong len, ulong n, flint_rand_t stat return res; } -#ifdef HAVE_AVX512 +#ifdef PML_HAVE_AVX512 ulong time_nmod_vec_dot_product_ifma256_cu(ulong len, ulong n, flint_rand_t state) { nmod_t mod; @@ -459,118 +459,6 @@ ulong time_nmod_vec_dot_product_ifma512_cf(ulong len, ulong n, flint_rand_t stat } #endif -#ifdef HAVE_AVX_IFMA -ulong time_nmod_vec_dot_product_avx_ifma_cu(ulong len, ulong n, flint_rand_t state) -{ - nmod_t mod; - nmod_init(&mod, n); - - nn_ptr v1[NB_ITER]; - for (slong i = 0; i < NB_ITER; i++) - { - v1[i] = _nmod_vec_init(len); - _nmod_vec_rand(v1[i], state, len, mod); - } - nn_ptr v2[NB_ITER]; - for (slong i = 0; i < NB_ITER; i++) - { - v2[i] = _nmod_vec_init(len); - _nmod_vec_rand(v2[i], state, len, mod); - } - ulong res[NB_ITER]; - - { // TEST - const dot_params_t params = _nmod_vec_dot_params(len, mod); - ulong res_split = _nmod_vec_dot_product_avx_ifma(v1[0], v2[0], len, mod); - ulong res_correct = _nmod_vec_dot(v1[0], v2[0], len, mod, params); - if (res_split != res_correct) - { - printf("\nDOT PRODUCT ERROR!\n"); - return 0; - } - } - - double t1; - clock_t tt; - long nb_iter; - - t1 = 0.0; nb_iter = 0; - while (t1 < TIME_THRES) - { - for (slong i = 0; i < NB_ITER; i++) // warmup - res[i] += _nmod_vec_dot_product_avx_ifma(v1[i], v2[i], len, mod); - - tt = clock(); - for (slong i = 0; i < NB_ITER; i++) - res[i] += _nmod_vec_dot_product_avx_ifma(v1[i], v2[i], len, mod); - t1 += (double)(clock()-tt) / CLOCKS_PER_SEC; - nb_iter += NB_ITER; - } - t1 /= nb_iter; - printf("%.1e\t", t1); - - for (slong i = 0; i < NB_ITER; i++) - { - _nmod_vec_clear(v1[i]); - _nmod_vec_clear(v2[i]); - } - - return 0; -} - -ulong time_nmod_vec_dot_product_avx_ifma_cf(ulong len, ulong n, flint_rand_t state) -{ - nmod_t mod; - nmod_init(&mod, n); - - nn_ptr v1; - v1 = _nmod_vec_init(len); - _nmod_vec_rand(v1, state, len, mod); - - nn_ptr v2; - v2 = _nmod_vec_init(len); - _nmod_vec_rand(v2, state, len, mod); - - ulong res = 0; - - { // TEST - const dot_params_t params = _nmod_vec_dot_params(len, mod); - ulong res_split = _nmod_vec_dot_product_avx_ifma(v1, v2, len, mod); - ulong res_correct = _nmod_vec_dot(v1, v2, len, mod, params); - if (res_split != res_correct) - { - printf("\nDOT PRODUCT ERROR!\n"); - return 0; - } - } - - double t1; - clock_t tt; - long nb_iter; - - t1 = 0.0; nb_iter = 0; - while (t1 < TIME_THRES) - { - for (slong i = 0; i < NB_ITER; i++) // warmup - res += _nmod_vec_dot_product_avx_ifma(v1, v2, len, mod); - - tt = clock(); - for (slong i = 0; i < NB_ITER; i++) - res += _nmod_vec_dot_product_avx_ifma(v1, v2, len, mod); - t1 += (double)(clock()-tt) / CLOCKS_PER_SEC; - nb_iter += NB_ITER; - } - t1 /= nb_iter; - printf("%.1e\t", t1); - - _nmod_vec_clear(v1); - _nmod_vec_clear(v2); - - return res; -} -#endif - - /*--------------------------------------------------------------*/ /* main calls time */ /*--------------------------------------------------------------*/ @@ -589,10 +477,8 @@ int main(int argc, char ** argv) const slong nbits = 19; const slong bits[] = {17, 20, 23, 26, 29, 30, 31, 32, 33, 40, 50, 55, 57, 59, 60, 61, 62, 63, 64}; -#ifdef HAVE_AVX512 +#ifdef PML_HAVE_AVX512 const slong nfuns = 8; -#elif HAVE_AVX_IFMA - const slong nfuns = 6; #else const slong nfuns = 4; #endif @@ -603,14 +489,11 @@ int main(int argc, char ** argv) time_nmod_vec_dot_product_flint_cu, // 1 time_nmod_vec_dot_product_split26_cf, // 2 time_nmod_vec_dot_product_split26_cu, // 3 -#if HAVE_AVX512 +#if PML_HAVE_AVX512 time_nmod_vec_dot_product_ifma256_cf, // 4 time_nmod_vec_dot_product_ifma256_cu, // 5 time_nmod_vec_dot_product_ifma512_cf, // 6 time_nmod_vec_dot_product_ifma512_cu, // 7 -#elif HAVE_AVX_IFMA - time_nmod_vec_dot_product_avx_ifma_cf, // 4 - time_nmod_vec_dot_product_avx_ifma_cu, // 5 #endif };