From 905f86e529c5f99e75e4f3d284ae475508723d42 Mon Sep 17 00:00:00 2001 From: greg Date: Sat, 2 Apr 2022 18:53:49 -0400 Subject: [PATCH 01/15] streamline/speed up addc/subc by using in/out carry parameter --- include/intx/intx.hpp | 136 ++++++++++++++++--------------- test/benchmarks/benchmarks.cpp | 4 +- test/experimental/addmod.hpp | 75 +++++++++-------- test/unittests/test_builtins.cpp | 27 ++++-- 4 files changed, 133 insertions(+), 109 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index aebf1f93..4412b109 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -163,83 +163,79 @@ struct result_with_carry /// Linear arithmetic operators. /// @{ -/// Addition with carry. -inline constexpr result_with_carry addc( - uint64_t x, uint64_t y, bool carry = false) noexcept +/// Addition with carry. `uint64_t *carry` is used as in/out parameter +inline constexpr uint64_t addc( + uint64_t x, uint64_t y, uint64_t *carry) noexcept { #if __has_builtin(__builtin_addcll) if (!is_constant_evaluated()) { - unsigned long long carryout = 0; // NOLINT(google-runtime-int) - const auto s = __builtin_addcll(x, y, carry, &carryout); static_assert(sizeof(s) == sizeof(uint64_t)); - return {s, static_cast(carryout)}; + return __builtin_addcll(x, y, *carry); } #elif __has_builtin(__builtin_ia32_addcarryx_u64) if (!is_constant_evaluated()) { unsigned long long s = 0; // NOLINT(google-runtime-int) static_assert(sizeof(s) == sizeof(uint64_t)); - const auto carryout = __builtin_ia32_addcarryx_u64(carry, x, y, &s); - return {s, static_cast(carryout)}; + *carry = __builtin_ia32_addcarryx_u64(*carry, x, y, &s); + return s; } #endif const auto s = x + y; const auto carry1 = s < x; - const auto t = s + carry; + const auto t = s + *carry; const auto carry2 = t < s; - return {t, carry1 || carry2}; + *carry = !!(carry1 || carry2); + return t; } -/// Subtraction with carry (borrow). -inline constexpr result_with_carry subc( - uint64_t x, uint64_t y, bool carry = false) noexcept +/// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter +inline constexpr uint64_t subc( + uint64_t x, uint64_t y, uint64_t *carry) noexcept { #if __has_builtin(__builtin_subcll) if (!is_constant_evaluated()) { - unsigned long long carryout = 0; // NOLINT(google-runtime-int) - const auto d = __builtin_subcll(x, y, carry, &carryout); static_assert(sizeof(d) == sizeof(uint64_t)); - return {d, static_cast(carryout)}; + return __builtin_subcll(x, y, *carry, carry); } #elif __has_builtin(__builtin_ia32_sbb_u64) if (!is_constant_evaluated()) { unsigned long long d = 0; // NOLINT(google-runtime-int) static_assert(sizeof(d) == sizeof(uint64_t)); - const auto carryout = __builtin_ia32_sbb_u64(carry, x, y, &d); - return {d, static_cast(carryout)}; + *carry = __builtin_ia32_sbb_u64(*carry, x, y, &d); + return d; } #endif const auto d = x - y; const auto carry1 = x < y; - const auto e = d - carry; - const auto carry2 = d < uint64_t{carry}; - return {e, carry1 || carry2}; + const auto e = d - *carry; + const auto carry2 = d < *carry; + *carry = !!(carry1 || carry2); + return e; } /// Addition with carry. template -inline constexpr result_with_carry> addc( - const uint& x, const uint& y, bool carry = false) noexcept +inline constexpr uint addc( + const uint& x, const uint& y, uint64_t *carry) noexcept { uint s; - bool k = carry; for (size_t i = 0; i < uint::num_words; ++i) { - const auto t = addc(x[i], y[i], k); - s[i] = t.value; - k = t.carry; + s[i] = addc(x[i], y[i], carry); } - return {s, k}; + return s; } inline constexpr uint128 operator+(uint128 x, uint128 y) noexcept { - return addc(x, y).value; + uint64_t carry = 0; + return addc(x, y, &carry); } inline constexpr uint128 operator+(uint128 x) noexcept @@ -250,23 +246,21 @@ inline constexpr uint128 operator+(uint128 x) noexcept /// Performs subtraction of two unsigned numbers and returns the difference /// and the carry bit (aka borrow, overflow). template -inline constexpr result_with_carry> subc( - const uint& x, const uint& y, bool carry = false) noexcept +inline constexpr uint subc( + const uint& x, const uint& y, uint64_t *carry) noexcept { uint z; - bool k = carry; for (size_t i = 0; i < uint::num_words; ++i) { - const auto t = subc(x[i], y[i], k); - z[i] = t.value; - k = t.carry; + z[i] = subc(x[i], y[i], carry); } - return {z, k}; + return z; } inline constexpr uint128 operator-(uint128 x, uint128 y) noexcept { - return subc(x, y).value; + uint64_t carry = 0; + return subc(x, y, &carry); } inline constexpr uint128 operator-(uint128 x) noexcept @@ -1137,7 +1131,9 @@ inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept template inline constexpr bool operator<(const uint& x, const uint& y) noexcept { - return subc(x, y).carry; + uint64_t carry = 0; + subc(x, y, &carry); + return !!carry; } template inline constexpr uint operator+(const uint& x, const uint& y) noexcept { - return addc(x, y).value; + uint64_t carry = 0; + return addc(x, y, &carry); } template @@ -1473,7 +1470,8 @@ inline constexpr uint operator-(const uint& x) noexcept template inline constexpr uint operator-(const uint& x, const uint& y) noexcept { - return subc(x, y).value; + uint64_t carry = 0; + return subc(x, y, &carry); } template umul(const uint& x, const uint& y) noexcept uint64_t k = 0; for (size_t i = 0; i < num_words; ++i) { - const auto a = addc(p[i + j], k); - const auto t = umul(x[i], y[j]) + uint128{a.value, a.carry}; + uint64_t carry = 0; + const auto a = addc(p[i + j], k, &carry); + const auto t = umul(x[i], y[j]) + uint128{a, carry}; p[i + j] = t[0]; k = t[1]; } @@ -1524,8 +1523,9 @@ inline constexpr uint operator*(const uint& x, const uint& y) noexcept uint64_t k = 0; for (size_t i = 0; i < (num_words - j - 1); i++) { - const auto a = addc(p[i + j], k); - const auto t = umul(x[i], y[j]) + uint128{a.value, a.carry}; + uint64_t carry = 0; + const auto a = addc(p[i + j], k, &carry); + const auto t = umul(x[i], y[j]) + uint128{a, carry}; p[i + j] = t[0]; k = t[1]; } @@ -1715,10 +1715,10 @@ inline bool add(uint64_t s[], const uint64_t x[], const uint64_t y[], int len) n // OPT: Add MinLen template parameter and unroll first loop iterations. INTX_REQUIRE(len >= 2); - bool carry = false; + uint64_t carry = 0; for (int i = 0; i < len; ++i) - std::tie(s[i], carry) = addc(x[i], y[i], carry); - return carry; + s[i] = addc(x[i], y[i], &carry); + return !!carry; } /// r = x - multiplier * y. @@ -1766,12 +1766,13 @@ inline void udivrem_knuth( uint128 rhat; std::tie(qhat, rhat) = udivrem_3by2(u2, u1, u0, divisor, reciprocal); - bool carry{}; const auto overflow = submul(&u[j], &u[j], d, dlen - 2, qhat); - std::tie(u[j + dlen - 2], carry) = subc(rhat[0], overflow); - std::tie(u[j + dlen - 1], carry) = subc(rhat[1], carry); + uint64_t carry1 = 0; + u[j + dlen - 2] = subc(rhat[0], overflow, &carry1); + uint64_t carry2 = 0; + u[j + dlen - 1] = subc(rhat[1], carry1, &carry2); - if (INTX_UNLIKELY(carry)) + if (INTX_UNLIKELY(!!carry2)) { --qhat; u[j + dlen - 1] += divisor[1] + add(&u[j], &u[j], d, dlen - 1); @@ -2035,28 +2036,31 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no { // Normalize x in case it is bigger than mod. auto xn = x; - const auto xd = subc(x, mod); - if (!xd.carry) - xn = xd.value; + uint64_t carry = 0; + const auto xd = subc(x, mod, &carry); + if (!carry) + xn = xd; // Normalize y in case it is bigger than mod. auto yn = y; - const auto yd = subc(y, mod); - if (!yd.carry) - yn = yd.value; - - const auto a = addc(xn, yn); - const auto av = a.value; - const auto b = subc(av, mod); - const auto bv = b.value; - if (a.carry || !b.carry) + carry = 0; + const auto yd = subc(y, mod, &carry); + if (!carry) + yn = yd; + + carry = 0; + uint64_t carry2 = 0; + const auto av = addc(xn, yn, &carry2); + const auto bv = subc(av, mod, &carry); + if (carry2 || !carry) return bv; return av; } - const auto s = addc(x, y); - uint<256 + 64> n = s.value; - n[4] = s.carry; + uint64_t carry = 0; + const auto s = addc(x, y, &carry); + uint<256 + 64> n = s; + n[4] = carry; return udivrem(n, mod).rem; } diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index ff37d683..928e00de 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -355,7 +355,9 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3); [[gnu::noinline]] static bool lt_sub(const uint256& x, const uint256& y) noexcept { - return subc(x, y).carry; + uint64_t carry = 0; + subc(x, y, &carry); + return !!carry; } [[gnu::noinline]] static bool lt_wordcmp(const uint256& x, const uint256& y) noexcept diff --git a/test/experimental/addmod.hpp b/test/experimental/addmod.hpp index 78c0e042..80e250d1 100644 --- a/test/experimental/addmod.hpp +++ b/test/experimental/addmod.hpp @@ -16,9 +16,9 @@ namespace intx::test [[maybe_unused, gnu::noinline]] static uint256 addmod_simple( const uint256& x, const uint256& y, const uint256& mod) noexcept { - const auto s = addc(x, y); - uint<256 + 64> n = s.value; - n[4] = s.carry; + uint64_t carry = 0; + uint<256 + 64> n = addc(x, y, &carry); + n[4] = carry; return udivrem(n, mod).rem; } @@ -28,9 +28,9 @@ namespace intx::test const auto xm = x >= mod ? x % mod : x; const auto ym = y >= mod ? y % mod : y; - const auto s = addc(xm, ym); - auto sum = s.value; - if (s.carry || s.value >= mod) + uint64_t carry = 0; + auto sum = addc(xm, ym, &carry); + if (carry || sum >= mod) sum -= mod; return sum; } @@ -43,22 +43,28 @@ namespace intx::test // Based on https://github.com/holiman/uint256/pull/86. if ((m[3] != 0) && (x[3] <= m[3]) && (y[3] <= m[3])) { - auto s = subc(x, m); - if (s.carry) - s.value = x; + uint64_t carry = 0; + auto s = subc(x, m, &carry); + if (carry) + s = x; - auto t = subc(y, m); - if (t.carry) - t.value = y; + carry = 0; + auto t = subc(y, m, &carry); + if (carry) + t = y; - s = addc(s.value, t.value); - t = subc(s.value, m); - return (s.carry || !t.carry) ? t.value : s.value; + carry = 0; + s = addc(s, t, &carry); + + uint64_t carry2 = 0; + t = subc(s, m, &carry2); + + return (carry || !carry2) ? t : s; } - const auto s = addc(x, y); - uint<256 + 64> n = s.value; - n[4] = s.carry; + uint64_t carry = 0; + uint<256 + 64> n = addc(x, y, &carry); + n[4] = carry; return udivrem(n, m).rem; } @@ -72,28 +78,31 @@ namespace intx::test { // Normalize x in case it is bigger than mod. auto xn = x; - const auto xd = subc(x, mod); - if (!xd.carry) - xn = xd.value; + uint64_t carry = 0; + const auto xd = subc(x, mod, &carry); + if (!carry) + xn = xd; // Normalize y in case it is bigger than mod. auto yn = y; - const auto yd = subc(y, mod); - if (!yd.carry) - yn = yd.value; - - const auto a = addc(xn, yn); - const auto av = a.value; - const auto b = subc(av, mod); - const auto bv = b.value; - if (a.carry || !b.carry) + carry = 0; + const auto yd = subc(y, mod, &carry); + if (!carry) + yn = yd; + + carry = 0; + const auto av = addc(xn, yn, &carry); + + uint64_t carry2 = 0; + const auto bv = subc(av, mod, &carry2); + if (carry || !carry2) return bv; return av; } - const auto s = addc(x, y); - uint<256 + 64> n = s.value; - n[4] = s.carry; + uint64_t carry = 0; + uint<256 + 64> n = addc(x, y, &carry); + n[4] = carry; return udivrem(n, mod).rem; } } // namespace intx::test diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp index a11f2a5d..545f88a7 100644 --- a/test/unittests/test_builtins.cpp +++ b/test/unittests/test_builtins.cpp @@ -31,15 +31,24 @@ static_assert(to_big_endian(uint32_t{0x0d0c0b0a}) == (is_le ? 0x0a0b0c0d : 0x0d0 static_assert(to_big_endian(uint64_t{0x02010f0e0d0c0b0a}) == (is_le ? 0x0a0b0c0d0e0f0102 : 0x02010f0e0d0c0b0a)); -static_assert(addc(0, 0).value == 0); -static_assert(!addc(0, 0).carry); -static_assert(addc(0xffffffffffffffff, 2).value == 1); -static_assert(addc(0xffffffffffffffff, 2).carry); - -static_assert(subc(0, 0).value == 0); -static_assert(!subc(0, 0).carry); -static_assert(subc(0, 1).value == 0xffffffffffffffff); -static_assert(subc(0, 1).carry); + +TEST(builtins, addc) +{ + uint64_t carry = 0; + EXPECT_EQ(addc(0, 0, &carry), 0); + EXPECT_EQ(carry, 0); + EXPECT_EQ(addc(0xffffffffffffffff, 2, &carry), 1); + EXPECT_NE(carry, 0); +} + +TEST(builtins, subc) +{ + uint64_t carry = 0; + EXPECT_EQ(subc(0, 0, &carry), 0); + EXPECT_TRUE(!carry); + EXPECT_EQ(subc(0, 1, &carry), 0xffffffffffffffff); + EXPECT_TRUE(carry); +} TEST(builtins, clz64_single_one) From ebe54df36e95bf568215e9ce24c4ecfc03e01460 Mon Sep 17 00:00:00 2001 From: greg Date: Sat, 2 Apr 2022 18:59:39 -0400 Subject: [PATCH 02/15] add cast for clang build. --- include/intx/intx.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 4412b109..4fb26be1 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -178,7 +178,7 @@ inline constexpr uint64_t addc( { unsigned long long s = 0; // NOLINT(google-runtime-int) static_assert(sizeof(s) == sizeof(uint64_t)); - *carry = __builtin_ia32_addcarryx_u64(*carry, x, y, &s); + *carry = __builtin_ia32_addcarryx_u64((unsigned char)*carry, x, y, &s); return s; } #endif @@ -206,7 +206,7 @@ inline constexpr uint64_t subc( { unsigned long long d = 0; // NOLINT(google-runtime-int) static_assert(sizeof(d) == sizeof(uint64_t)); - *carry = __builtin_ia32_sbb_u64(*carry, x, y, &d); + *carry = __builtin_ia32_sbb_u64((unsigned char)*carry, x, y, &d); return d; } #endif From 9074a9e74a41dbdeda76a84146be74b7e868e81a Mon Sep 17 00:00:00 2001 From: greg Date: Sat, 2 Apr 2022 19:03:26 -0400 Subject: [PATCH 03/15] implement some code formatting suggestions --- include/intx/intx.hpp | 12 ++++-------- test/unittests/test_builtins.cpp | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 4fb26be1..fcf86df3 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -164,8 +164,7 @@ struct result_with_carry /// @{ /// Addition with carry. `uint64_t *carry` is used as in/out parameter -inline constexpr uint64_t addc( - uint64_t x, uint64_t y, uint64_t *carry) noexcept +inline constexpr uint64_t addc(uint64_t x, uint64_t y, uint64_t *carry) noexcept { #if __has_builtin(__builtin_addcll) if (!is_constant_evaluated()) @@ -192,8 +191,7 @@ inline constexpr uint64_t addc( } /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter -inline constexpr uint64_t subc( - uint64_t x, uint64_t y, uint64_t *carry) noexcept +inline constexpr uint64_t subc(uint64_t x, uint64_t y, uint64_t *carry) noexcept { #if __has_builtin(__builtin_subcll) if (!is_constant_evaluated()) @@ -221,8 +219,7 @@ inline constexpr uint64_t subc( /// Addition with carry. template -inline constexpr uint addc( - const uint& x, const uint& y, uint64_t *carry) noexcept +inline constexpr uint addc(const uint& x, const uint& y, uint64_t *carry) noexcept { uint s; for (size_t i = 0; i < uint::num_words; ++i) @@ -246,8 +243,7 @@ inline constexpr uint128 operator+(uint128 x) noexcept /// Performs subtraction of two unsigned numbers and returns the difference /// and the carry bit (aka borrow, overflow). template -inline constexpr uint subc( - const uint& x, const uint& y, uint64_t *carry) noexcept +inline constexpr uint subc(const uint& x, const uint& y, uint64_t *carry) noexcept { uint z; for (size_t i = 0; i < uint::num_words; ++i) diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp index 545f88a7..51747d8a 100644 --- a/test/unittests/test_builtins.cpp +++ b/test/unittests/test_builtins.cpp @@ -38,7 +38,7 @@ TEST(builtins, addc) EXPECT_EQ(addc(0, 0, &carry), 0); EXPECT_EQ(carry, 0); EXPECT_EQ(addc(0xffffffffffffffff, 2, &carry), 1); - EXPECT_NE(carry, 0); + EXPECT_NE(carry, 0); } TEST(builtins, subc) From ba889d0b284017e3f931059b9f5358eec8cf454a Mon Sep 17 00:00:00 2001 From: greg Date: Sat, 2 Apr 2022 19:12:15 -0400 Subject: [PATCH 04/15] Fix a couple typos --- include/intx/intx.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index fcf86df3..e9ae0047 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -169,8 +169,7 @@ inline constexpr uint64_t addc(uint64_t x, uint64_t y, uint64_t *carry) noexcept #if __has_builtin(__builtin_addcll) if (!is_constant_evaluated()) { - static_assert(sizeof(s) == sizeof(uint64_t)); - return __builtin_addcll(x, y, *carry); + return __builtin_addcll(x, y, *carry, carry); } #elif __has_builtin(__builtin_ia32_addcarryx_u64) if (!is_constant_evaluated()) @@ -196,7 +195,6 @@ inline constexpr uint64_t subc(uint64_t x, uint64_t y, uint64_t *carry) noexcept #if __has_builtin(__builtin_subcll) if (!is_constant_evaluated()) { - static_assert(sizeof(d) == sizeof(uint64_t)); return __builtin_subcll(x, y, *carry, carry); } #elif __has_builtin(__builtin_ia32_sbb_u64) From 183afb2aa67b2c9f9ffcfa4e27b53bef9cdfc306 Mon Sep 17 00:00:00 2001 From: greg Date: Sat, 2 Apr 2022 19:23:23 -0400 Subject: [PATCH 05/15] fix clang build issues --- include/intx/intx.hpp | 38 ++++++++++++++++---------------- test/benchmarks/benchmarks.cpp | 2 +- test/experimental/addmod.hpp | 16 +++++++------- test/unittests/test_builtins.cpp | 4 ++-- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index e9ae0047..e3aabe12 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -164,7 +164,7 @@ struct result_with_carry /// @{ /// Addition with carry. `uint64_t *carry` is used as in/out parameter -inline constexpr uint64_t addc(uint64_t x, uint64_t y, uint64_t *carry) noexcept +inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long *carry) noexcept { #if __has_builtin(__builtin_addcll) if (!is_constant_evaluated()) @@ -190,7 +190,7 @@ inline constexpr uint64_t addc(uint64_t x, uint64_t y, uint64_t *carry) noexcept } /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter -inline constexpr uint64_t subc(uint64_t x, uint64_t y, uint64_t *carry) noexcept +inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long *carry) noexcept { #if __has_builtin(__builtin_subcll) if (!is_constant_evaluated()) @@ -217,7 +217,7 @@ inline constexpr uint64_t subc(uint64_t x, uint64_t y, uint64_t *carry) noexcept /// Addition with carry. template -inline constexpr uint addc(const uint& x, const uint& y, uint64_t *carry) noexcept +inline constexpr uint addc(const uint& x, const uint& y, unsigned long long *carry) noexcept { uint s; for (size_t i = 0; i < uint::num_words; ++i) @@ -229,7 +229,7 @@ inline constexpr uint addc(const uint& x, const uint& y, uint64_t *carr inline constexpr uint128 operator+(uint128 x, uint128 y) noexcept { - uint64_t carry = 0; + unsigned long long carry = 0; return addc(x, y, &carry); } @@ -241,7 +241,7 @@ inline constexpr uint128 operator+(uint128 x) noexcept /// Performs subtraction of two unsigned numbers and returns the difference /// and the carry bit (aka borrow, overflow). template -inline constexpr uint subc(const uint& x, const uint& y, uint64_t *carry) noexcept +inline constexpr uint subc(const uint& x, const uint& y, unsigned long long *carry) noexcept { uint z; for (size_t i = 0; i < uint::num_words; ++i) @@ -253,7 +253,7 @@ inline constexpr uint subc(const uint& x, const uint& y, uint64_t *carr inline constexpr uint128 operator-(uint128 x, uint128 y) noexcept { - uint64_t carry = 0; + unsigned long long carry = 0; return subc(x, y, &carry); } @@ -1125,7 +1125,7 @@ inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept template inline constexpr bool operator<(const uint& x, const uint& y) noexcept { - uint64_t carry = 0; + unsigned long long carry = 0; subc(x, y, &carry); return !!carry; } @@ -1299,7 +1299,7 @@ inline constexpr uint operator<<(const uint& x, uint64_t shift) noexcept const auto skip = static_cast(shift / word_bits); uint r; - uint64_t carry = 0; + unsigned long long carry = 0; for (size_t i = 0; i < (uint::num_words - skip); ++i) { r[i + skip] = (x[i] << s) | carry; @@ -1352,7 +1352,7 @@ inline constexpr uint operator>>(const uint& x, uint64_t shift) noexcept const auto skip = static_cast(shift / word_bits); uint r; - uint64_t carry = 0; + unsigned long long carry = 0; for (size_t i = 0; i < (num_words - skip); ++i) { r[num_words - 1 - i - skip] = (x[num_words - 1 - i] >> s) | carry; @@ -1451,7 +1451,7 @@ inline const uint8_t* as_bytes(const T& x) noexcept template inline constexpr uint operator+(const uint& x, const uint& y) noexcept { - uint64_t carry = 0; + unsigned long long carry = 0; return addc(x, y, &carry); } @@ -1464,7 +1464,7 @@ inline constexpr uint operator-(const uint& x) noexcept template inline constexpr uint operator-(const uint& x, const uint& y) noexcept { - uint64_t carry = 0; + unsigned long long carry = 0; return subc(x, y, &carry); } @@ -1493,7 +1493,7 @@ inline constexpr uint<2 * N> umul(const uint& x, const uint& y) noexcept uint64_t k = 0; for (size_t i = 0; i < num_words; ++i) { - uint64_t carry = 0; + unsigned long long carry = 0; const auto a = addc(p[i + j], k, &carry); const auto t = umul(x[i], y[j]) + uint128{a, carry}; p[i + j] = t[0]; @@ -1517,7 +1517,7 @@ inline constexpr uint operator*(const uint& x, const uint& y) noexcept uint64_t k = 0; for (size_t i = 0; i < (num_words - j - 1); i++) { - uint64_t carry = 0; + unsigned long long carry = 0; const auto a = addc(p[i + j], k, &carry); const auto t = umul(x[i], y[j]) + uint128{a, carry}; p[i + j] = t[0]; @@ -1709,7 +1709,7 @@ inline bool add(uint64_t s[], const uint64_t x[], const uint64_t y[], int len) n // OPT: Add MinLen template parameter and unroll first loop iterations. INTX_REQUIRE(len >= 2); - uint64_t carry = 0; + unsigned long long carry = 0; for (int i = 0; i < len; ++i) s[i] = addc(x[i], y[i], &carry); return !!carry; @@ -1761,9 +1761,9 @@ inline void udivrem_knuth( std::tie(qhat, rhat) = udivrem_3by2(u2, u1, u0, divisor, reciprocal); const auto overflow = submul(&u[j], &u[j], d, dlen - 2, qhat); - uint64_t carry1 = 0; + unsigned long long carry1 = 0; u[j + dlen - 2] = subc(rhat[0], overflow, &carry1); - uint64_t carry2 = 0; + unsigned long long carry2 = 0; u[j + dlen - 1] = subc(rhat[1], carry1, &carry2); if (INTX_UNLIKELY(!!carry2)) @@ -2030,7 +2030,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no { // Normalize x in case it is bigger than mod. auto xn = x; - uint64_t carry = 0; + unsigned long long carry = 0; const auto xd = subc(x, mod, &carry); if (!carry) xn = xd; @@ -2043,7 +2043,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no yn = yd; carry = 0; - uint64_t carry2 = 0; + unsigned long long carry2 = 0; const auto av = addc(xn, yn, &carry2); const auto bv = subc(av, mod, &carry); if (carry2 || !carry) @@ -2051,7 +2051,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no return av; } - uint64_t carry = 0; + unsigned long long carry = 0; const auto s = addc(x, y, &carry); uint<256 + 64> n = s; n[4] = carry; diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index 928e00de..25a0184c 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -355,7 +355,7 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3); [[gnu::noinline]] static bool lt_sub(const uint256& x, const uint256& y) noexcept { - uint64_t carry = 0; + unsigned long long carry = 0; subc(x, y, &carry); return !!carry; } diff --git a/test/experimental/addmod.hpp b/test/experimental/addmod.hpp index 80e250d1..6153c299 100644 --- a/test/experimental/addmod.hpp +++ b/test/experimental/addmod.hpp @@ -16,7 +16,7 @@ namespace intx::test [[maybe_unused, gnu::noinline]] static uint256 addmod_simple( const uint256& x, const uint256& y, const uint256& mod) noexcept { - uint64_t carry = 0; + unsigned long long carry = 0; uint<256 + 64> n = addc(x, y, &carry); n[4] = carry; return udivrem(n, mod).rem; @@ -28,7 +28,7 @@ namespace intx::test const auto xm = x >= mod ? x % mod : x; const auto ym = y >= mod ? y % mod : y; - uint64_t carry = 0; + unsigned long long carry = 0; auto sum = addc(xm, ym, &carry); if (carry || sum >= mod) sum -= mod; @@ -43,7 +43,7 @@ namespace intx::test // Based on https://github.com/holiman/uint256/pull/86. if ((m[3] != 0) && (x[3] <= m[3]) && (y[3] <= m[3])) { - uint64_t carry = 0; + unsigned long long carry = 0; auto s = subc(x, m, &carry); if (carry) s = x; @@ -56,13 +56,13 @@ namespace intx::test carry = 0; s = addc(s, t, &carry); - uint64_t carry2 = 0; + unsigned long long carry2 = 0; t = subc(s, m, &carry2); return (carry || !carry2) ? t : s; } - uint64_t carry = 0; + unsigned long long carry = 0; uint<256 + 64> n = addc(x, y, &carry); n[4] = carry; return udivrem(n, m).rem; @@ -78,7 +78,7 @@ namespace intx::test { // Normalize x in case it is bigger than mod. auto xn = x; - uint64_t carry = 0; + unsigned long long carry = 0; const auto xd = subc(x, mod, &carry); if (!carry) xn = xd; @@ -93,14 +93,14 @@ namespace intx::test carry = 0; const auto av = addc(xn, yn, &carry); - uint64_t carry2 = 0; + unsigned long long carry2 = 0; const auto bv = subc(av, mod, &carry2); if (carry || !carry2) return bv; return av; } - uint64_t carry = 0; + unsigned long long carry = 0; uint<256 + 64> n = addc(x, y, &carry); n[4] = carry; return udivrem(n, mod).rem; diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp index 51747d8a..e5324661 100644 --- a/test/unittests/test_builtins.cpp +++ b/test/unittests/test_builtins.cpp @@ -34,7 +34,7 @@ static_assert(to_big_endian(uint64_t{0x02010f0e0d0c0b0a}) == TEST(builtins, addc) { - uint64_t carry = 0; + unsigned long long carry = 0; EXPECT_EQ(addc(0, 0, &carry), 0); EXPECT_EQ(carry, 0); EXPECT_EQ(addc(0xffffffffffffffff, 2, &carry), 1); @@ -43,7 +43,7 @@ TEST(builtins, addc) TEST(builtins, subc) { - uint64_t carry = 0; + unsigned long long carry = 0; EXPECT_EQ(subc(0, 0, &carry), 0); EXPECT_TRUE(!carry); EXPECT_EQ(subc(0, 1, &carry), 0xffffffffffffffff); From 4ae0f6f43e3d53a4956b4470b588ae82f012ce5c Mon Sep 17 00:00:00 2001 From: greg Date: Sat, 2 Apr 2022 19:26:01 -0400 Subject: [PATCH 06/15] fix code formatting --- include/intx/intx.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index e3aabe12..9a338c28 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -164,7 +164,7 @@ struct result_with_carry /// @{ /// Addition with carry. `uint64_t *carry` is used as in/out parameter -inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long *carry) noexcept +inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long* carry) noexcept { #if __has_builtin(__builtin_addcll) if (!is_constant_evaluated()) @@ -190,7 +190,7 @@ inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long *carry } /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter -inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long *carry) noexcept +inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long* carry) noexcept { #if __has_builtin(__builtin_subcll) if (!is_constant_evaluated()) @@ -217,7 +217,7 @@ inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long *carry /// Addition with carry. template -inline constexpr uint addc(const uint& x, const uint& y, unsigned long long *carry) noexcept +inline constexpr uint addc(const uint& x, const uint& y, unsigned long long* carry) noexcept { uint s; for (size_t i = 0; i < uint::num_words; ++i) @@ -241,7 +241,7 @@ inline constexpr uint128 operator+(uint128 x) noexcept /// Performs subtraction of two unsigned numbers and returns the difference /// and the carry bit (aka borrow, overflow). template -inline constexpr uint subc(const uint& x, const uint& y, unsigned long long *carry) noexcept +inline constexpr uint subc(const uint& x, const uint& y, unsigned long long* carry) noexcept { uint z; for (size_t i = 0; i < uint::num_words; ++i) From fe61916bffe206f4554154680d972de28c9b78e0 Mon Sep 17 00:00:00 2001 From: greg Date: Sat, 2 Apr 2022 19:27:05 -0400 Subject: [PATCH 07/15] Fix code formatting --- include/intx/intx.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 9a338c28..54c0c97d 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -164,7 +164,8 @@ struct result_with_carry /// @{ /// Addition with carry. `uint64_t *carry` is used as in/out parameter -inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long* carry) noexcept +inline constexpr uint64_t addc( + uint64_t x, uint64_t y, unsigned long long* carry) noexcept { #if __has_builtin(__builtin_addcll) if (!is_constant_evaluated()) @@ -190,7 +191,8 @@ inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long* carry } /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter -inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long* carry) noexcept +inline constexpr uint64_t subc( + uint64_t x, uint64_t y, unsigned long long* carry) noexcept { #if __has_builtin(__builtin_subcll) if (!is_constant_evaluated()) From 98d704f4f6b501f11327f10bf380a17255ae04b7 Mon Sep 17 00:00:00 2001 From: greg Date: Sat, 2 Apr 2022 19:33:14 -0400 Subject: [PATCH 08/15] lint issues --- include/intx/intx.hpp | 40 +++++++++++++++++--------------- test/benchmarks/benchmarks.cpp | 2 +- test/experimental/addmod.hpp | 16 ++++++------- test/unittests/test_builtins.cpp | 4 ++-- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 54c0c97d..18d43968 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -165,7 +165,7 @@ struct result_with_carry /// Addition with carry. `uint64_t *carry` is used as in/out parameter inline constexpr uint64_t addc( - uint64_t x, uint64_t y, unsigned long long* carry) noexcept + uint64_t x, uint64_t y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int) { #if __has_builtin(__builtin_addcll) if (!is_constant_evaluated()) @@ -192,7 +192,7 @@ inline constexpr uint64_t addc( /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter inline constexpr uint64_t subc( - uint64_t x, uint64_t y, unsigned long long* carry) noexcept + uint64_t x, uint64_t y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int) { #if __has_builtin(__builtin_subcll) if (!is_constant_evaluated()) @@ -219,7 +219,8 @@ inline constexpr uint64_t subc( /// Addition with carry. template -inline constexpr uint addc(const uint& x, const uint& y, unsigned long long* carry) noexcept +inline constexpr uint addc( + const uint& x, const uint& y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int) { uint s; for (size_t i = 0; i < uint::num_words; ++i) @@ -231,7 +232,7 @@ inline constexpr uint addc(const uint& x, const uint& y, unsigned long inline constexpr uint128 operator+(uint128 x, uint128 y) noexcept { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) return addc(x, y, &carry); } @@ -243,7 +244,8 @@ inline constexpr uint128 operator+(uint128 x) noexcept /// Performs subtraction of two unsigned numbers and returns the difference /// and the carry bit (aka borrow, overflow). template -inline constexpr uint subc(const uint& x, const uint& y, unsigned long long* carry) noexcept +inline constexpr uint subc( + const uint& x, const uint& y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int) { uint z; for (size_t i = 0; i < uint::num_words; ++i) @@ -255,7 +257,7 @@ inline constexpr uint subc(const uint& x, const uint& y, unsigned long inline constexpr uint128 operator-(uint128 x, uint128 y) noexcept { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) return subc(x, y, &carry); } @@ -1127,7 +1129,7 @@ inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept template inline constexpr bool operator<(const uint& x, const uint& y) noexcept { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) subc(x, y, &carry); return !!carry; } @@ -1301,7 +1303,7 @@ inline constexpr uint operator<<(const uint& x, uint64_t shift) noexcept const auto skip = static_cast(shift / word_bits); uint r; - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) for (size_t i = 0; i < (uint::num_words - skip); ++i) { r[i + skip] = (x[i] << s) | carry; @@ -1354,7 +1356,7 @@ inline constexpr uint operator>>(const uint& x, uint64_t shift) noexcept const auto skip = static_cast(shift / word_bits); uint r; - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) for (size_t i = 0; i < (num_words - skip); ++i) { r[num_words - 1 - i - skip] = (x[num_words - 1 - i] >> s) | carry; @@ -1453,7 +1455,7 @@ inline const uint8_t* as_bytes(const T& x) noexcept template inline constexpr uint operator+(const uint& x, const uint& y) noexcept { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) return addc(x, y, &carry); } @@ -1466,7 +1468,7 @@ inline constexpr uint operator-(const uint& x) noexcept template inline constexpr uint operator-(const uint& x, const uint& y) noexcept { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) return subc(x, y, &carry); } @@ -1495,7 +1497,7 @@ inline constexpr uint<2 * N> umul(const uint& x, const uint& y) noexcept uint64_t k = 0; for (size_t i = 0; i < num_words; ++i) { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) const auto a = addc(p[i + j], k, &carry); const auto t = umul(x[i], y[j]) + uint128{a, carry}; p[i + j] = t[0]; @@ -1519,7 +1521,7 @@ inline constexpr uint operator*(const uint& x, const uint& y) noexcept uint64_t k = 0; for (size_t i = 0; i < (num_words - j - 1); i++) { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) const auto a = addc(p[i + j], k, &carry); const auto t = umul(x[i], y[j]) + uint128{a, carry}; p[i + j] = t[0]; @@ -1711,7 +1713,7 @@ inline bool add(uint64_t s[], const uint64_t x[], const uint64_t y[], int len) n // OPT: Add MinLen template parameter and unroll first loop iterations. INTX_REQUIRE(len >= 2); - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) for (int i = 0; i < len; ++i) s[i] = addc(x[i], y[i], &carry); return !!carry; @@ -1763,9 +1765,9 @@ inline void udivrem_knuth( std::tie(qhat, rhat) = udivrem_3by2(u2, u1, u0, divisor, reciprocal); const auto overflow = submul(&u[j], &u[j], d, dlen - 2, qhat); - unsigned long long carry1 = 0; + unsigned long long carry1 = 0; // NOLINT(google-runtime-int) u[j + dlen - 2] = subc(rhat[0], overflow, &carry1); - unsigned long long carry2 = 0; + unsigned long long carry2 = 0; // NOLINT(google-runtime-int) u[j + dlen - 1] = subc(rhat[1], carry1, &carry2); if (INTX_UNLIKELY(!!carry2)) @@ -2032,7 +2034,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no { // Normalize x in case it is bigger than mod. auto xn = x; - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) const auto xd = subc(x, mod, &carry); if (!carry) xn = xd; @@ -2045,7 +2047,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no yn = yd; carry = 0; - unsigned long long carry2 = 0; + unsigned long long carry2 = 0; // NOLINT(google-runtime-int) const auto av = addc(xn, yn, &carry2); const auto bv = subc(av, mod, &carry); if (carry2 || !carry) @@ -2053,7 +2055,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no return av; } - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) const auto s = addc(x, y, &carry); uint<256 + 64> n = s; n[4] = carry; diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index 25a0184c..aefdd44a 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -355,7 +355,7 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3); [[gnu::noinline]] static bool lt_sub(const uint256& x, const uint256& y) noexcept { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) subc(x, y, &carry); return !!carry; } diff --git a/test/experimental/addmod.hpp b/test/experimental/addmod.hpp index 6153c299..38a7048f 100644 --- a/test/experimental/addmod.hpp +++ b/test/experimental/addmod.hpp @@ -16,7 +16,7 @@ namespace intx::test [[maybe_unused, gnu::noinline]] static uint256 addmod_simple( const uint256& x, const uint256& y, const uint256& mod) noexcept { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) uint<256 + 64> n = addc(x, y, &carry); n[4] = carry; return udivrem(n, mod).rem; @@ -28,7 +28,7 @@ namespace intx::test const auto xm = x >= mod ? x % mod : x; const auto ym = y >= mod ? y % mod : y; - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) auto sum = addc(xm, ym, &carry); if (carry || sum >= mod) sum -= mod; @@ -43,7 +43,7 @@ namespace intx::test // Based on https://github.com/holiman/uint256/pull/86. if ((m[3] != 0) && (x[3] <= m[3]) && (y[3] <= m[3])) { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) auto s = subc(x, m, &carry); if (carry) s = x; @@ -56,13 +56,13 @@ namespace intx::test carry = 0; s = addc(s, t, &carry); - unsigned long long carry2 = 0; + unsigned long long carry2 = 0; // NOLINT(google-runtime-int) t = subc(s, m, &carry2); return (carry || !carry2) ? t : s; } - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) uint<256 + 64> n = addc(x, y, &carry); n[4] = carry; return udivrem(n, m).rem; @@ -78,7 +78,7 @@ namespace intx::test { // Normalize x in case it is bigger than mod. auto xn = x; - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) const auto xd = subc(x, mod, &carry); if (!carry) xn = xd; @@ -93,14 +93,14 @@ namespace intx::test carry = 0; const auto av = addc(xn, yn, &carry); - unsigned long long carry2 = 0; + unsigned long long carry2 = 0; // NOLINT(google-runtime-int) const auto bv = subc(av, mod, &carry2); if (carry || !carry2) return bv; return av; } - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) uint<256 + 64> n = addc(x, y, &carry); n[4] = carry; return udivrem(n, mod).rem; diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp index e5324661..ab36f11a 100644 --- a/test/unittests/test_builtins.cpp +++ b/test/unittests/test_builtins.cpp @@ -34,7 +34,7 @@ static_assert(to_big_endian(uint64_t{0x02010f0e0d0c0b0a}) == TEST(builtins, addc) { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) EXPECT_EQ(addc(0, 0, &carry), 0); EXPECT_EQ(carry, 0); EXPECT_EQ(addc(0xffffffffffffffff, 2, &carry), 1); @@ -43,7 +43,7 @@ TEST(builtins, addc) TEST(builtins, subc) { - unsigned long long carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) EXPECT_EQ(subc(0, 0, &carry), 0); EXPECT_TRUE(!carry); EXPECT_EQ(subc(0, 1, &carry), 0xffffffffffffffff); From 1dbe06d8ec3075652f6ae15018dbb963dad56186 Mon Sep 17 00:00:00 2001 From: greg Date: Sun, 3 Apr 2022 11:38:18 -0400 Subject: [PATCH 09/15] add another optimization in addc --- include/intx/intx.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 18d43968..1928d6be 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -182,6 +182,11 @@ inline constexpr uint64_t addc( } #endif + if (((x | y) & (uint64_t(1) << 63)) == 0) { + const auto t = x + y + *carry; + *carry = 0; + return t; + } const auto s = x + y; const auto carry1 = s < x; const auto t = s + *carry; From ef4fa8d69a4bdf618dffeec4a3c0de83062f81e4 Mon Sep 17 00:00:00 2001 From: greg Date: Sun, 3 Apr 2022 17:53:18 -0400 Subject: [PATCH 10/15] more optimizations. Use shortcuts in comparison instead of doing full subtraction --- include/intx/intx.hpp | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 1928d6be..c6a0ffb6 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -148,18 +148,6 @@ inline constexpr bool is_constant_evaluated() noexcept } -/// Contains result of add/sub/etc with a carry flag. -template -struct result_with_carry -{ - T value; - bool carry; - - /// Conversion to tuple of references, to allow usage with std::tie(). - constexpr operator std::tuple() noexcept { return {value, carry}; } -}; - - /// Linear arithmetic operators. /// @{ @@ -182,16 +170,9 @@ inline constexpr uint64_t addc( } #endif - if (((x | y) & (uint64_t(1) << 63)) == 0) { - const auto t = x + y + *carry; - *carry = 0; - return t; - } const auto s = x + y; - const auto carry1 = s < x; const auto t = s + *carry; - const auto carry2 = t < s; - *carry = !!(carry1 || carry2); + *carry = (s < x) || (t < s); return t; } @@ -215,10 +196,8 @@ inline constexpr uint64_t subc( #endif const auto d = x - y; - const auto carry1 = x < y; const auto e = d - *carry; - const auto carry2 = d < *carry; - *carry = !!(carry1 || carry2); + *carry = (x < y) || (d < *carry); return e; } @@ -339,7 +318,7 @@ inline constexpr bool operator<(uint128 x, uint128 y) noexcept #if INTX_HAS_BUILTIN_INT128 return builtin_uint128{x} < builtin_uint128{y}; #else - return (unsigned{x[1] < y[1]} | (unsigned{x[1] == y[1]} & unsigned{x[0] < y[0]})) != 0; + return x[1] < y[1] || (x[1] == y[1] && x[0] < y[0]}; #endif } @@ -1134,9 +1113,13 @@ inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept template inline constexpr bool operator<(const uint& x, const uint& y) noexcept { - unsigned long long carry = 0; // NOLINT(google-runtime-int) - subc(x, y, &carry); - return !!carry; + for (size_t i = uint::num_words; i-- > 0; ) { + if (x[i] < y[i]) + return true; + if (x[i] > y[i]) + return false; + } + return false; } template Date: Sun, 3 Apr 2022 18:09:39 -0400 Subject: [PATCH 11/15] small speedups. --- include/intx/intx.hpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index c6a0ffb6..07b63ddd 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -1096,30 +1096,16 @@ inline constexpr bool operator!=(const T& x, const uint& y) noexcept return uint(x) != y; } -#if !defined(_MSC_VER) || _MSC_VER < 1916 // This kills MSVC 2017 compiler. -inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept -{ - auto xp = uint128{x[2], x[3]}; - auto yp = uint128{y[2], y[3]}; - if (xp == yp) - { - xp = uint128{x[0], x[1]}; - yp = uint128{y[0], y[1]}; - } - return xp < yp; -} -#endif - template inline constexpr bool operator<(const uint& x, const uint& y) noexcept { - for (size_t i = uint::num_words; i-- > 0; ) { + for (size_t i = uint::num_words; i-- > 1; ) { if (x[i] < y[i]) return true; if (x[i] > y[i]) return false; } - return false; + return x[0] < y[0]; } template Date: Sun, 3 Apr 2022 18:12:16 -0400 Subject: [PATCH 12/15] fix typo --- include/intx/intx.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 07b63ddd..f09b787e 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -318,7 +318,7 @@ inline constexpr bool operator<(uint128 x, uint128 y) noexcept #if INTX_HAS_BUILTIN_INT128 return builtin_uint128{x} < builtin_uint128{y}; #else - return x[1] < y[1] || (x[1] == y[1] && x[0] < y[0]}; + return x[1] < y[1] || (x[1] == y[1] && x[0] < y[0]); #endif } From cf27392f1434f1f2e9fcb5c4bb20e21b41cc4032 Mon Sep 17 00:00:00 2001 From: greg Date: Sun, 3 Apr 2022 18:30:27 -0400 Subject: [PATCH 13/15] faster comparison --- include/intx/intx.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index f09b787e..ba105d02 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -1100,10 +1100,8 @@ template inline constexpr bool operator<(const uint& x, const uint& y) noexcept { for (size_t i = uint::num_words; i-- > 1; ) { - if (x[i] < y[i]) - return true; - if (x[i] > y[i]) - return false; + if (x[i] != y[i]) + return x[i] < y[i]; } return x[0] < y[0]; } From 7f33f768c98c3b7080615bccbf47f5d079213a3b Mon Sep 17 00:00:00 2001 From: greg Date: Sun, 3 Apr 2022 18:45:11 -0400 Subject: [PATCH 14/15] make addc/subc branchless --- include/intx/intx.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index ba105d02..ba044885 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -172,7 +172,7 @@ inline constexpr uint64_t addc( const auto s = x + y; const auto t = s + *carry; - *carry = (s < x) || (t < s); + *carry = uint64_t(s < x) | uint64_t(t < s); return t; } @@ -197,7 +197,7 @@ inline constexpr uint64_t subc( const auto d = x - y; const auto e = d - *carry; - *carry = (x < y) || (d < *carry); + *carry = uint64_t(x < y) | uint64_t(d < *carry); return e; } From 9ec355447febf6f2a11a646cff5a48080e484707 Mon Sep 17 00:00:00 2001 From: greg Date: Sun, 3 Apr 2022 19:03:48 -0400 Subject: [PATCH 15/15] revert previous change so it remains branchless --- include/intx/intx.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index ba044885..d037f4dd 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -318,7 +318,7 @@ inline constexpr bool operator<(uint128 x, uint128 y) noexcept #if INTX_HAS_BUILTIN_INT128 return builtin_uint128{x} < builtin_uint128{y}; #else - return x[1] < y[1] || (x[1] == y[1] && x[0] < y[0]); + return (unsigned{x[1] < y[1]} | (unsigned{x[1] == y[1]} & unsigned{x[0] < y[0]})) != 0; #endif }