diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index aebf1f93..d037f4dd 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -148,98 +148,76 @@ inline constexpr bool is_constant_evaluated() noexcept } -/// Contains result of add/sub/etc with a carry flag. -template -struct result_with_carry -{ - T value; - bool carry; - - /// Conversion to tuple of references, to allow usage with std::tie(). - constexpr operator std::tuple() noexcept { return {value, carry}; } -}; - - /// Linear arithmetic operators. /// @{ -/// Addition with carry. -inline constexpr result_with_carry addc( - uint64_t x, uint64_t y, bool carry = false) noexcept +/// Addition with carry. `uint64_t *carry` is used as in/out parameter +inline constexpr uint64_t addc( + uint64_t x, uint64_t y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int) { #if __has_builtin(__builtin_addcll) if (!is_constant_evaluated()) { - unsigned long long carryout = 0; // NOLINT(google-runtime-int) - const auto s = __builtin_addcll(x, y, carry, &carryout); - static_assert(sizeof(s) == sizeof(uint64_t)); - return {s, static_cast(carryout)}; + return __builtin_addcll(x, y, *carry, carry); } #elif __has_builtin(__builtin_ia32_addcarryx_u64) if (!is_constant_evaluated()) { unsigned long long s = 0; // NOLINT(google-runtime-int) static_assert(sizeof(s) == sizeof(uint64_t)); - const auto carryout = __builtin_ia32_addcarryx_u64(carry, x, y, &s); - return {s, static_cast(carryout)}; + *carry = __builtin_ia32_addcarryx_u64((unsigned char)*carry, x, y, &s); + return s; } #endif const auto s = x + y; - const auto carry1 = s < x; - const auto t = s + carry; - const auto carry2 = t < s; - return {t, carry1 || carry2}; + const auto t = s + *carry; + *carry = uint64_t(s < x) | uint64_t(t < s); + return t; } -/// Subtraction with carry (borrow). -inline constexpr result_with_carry subc( - uint64_t x, uint64_t y, bool carry = false) noexcept +/// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter +inline constexpr uint64_t subc( + uint64_t x, uint64_t y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int) { #if __has_builtin(__builtin_subcll) if (!is_constant_evaluated()) { - unsigned long long carryout = 0; // NOLINT(google-runtime-int) - const auto d = __builtin_subcll(x, y, carry, &carryout); - static_assert(sizeof(d) == sizeof(uint64_t)); - return {d, static_cast(carryout)}; + return __builtin_subcll(x, y, *carry, carry); } #elif __has_builtin(__builtin_ia32_sbb_u64) if (!is_constant_evaluated()) { unsigned long long d = 0; // NOLINT(google-runtime-int) static_assert(sizeof(d) == sizeof(uint64_t)); - const auto carryout = __builtin_ia32_sbb_u64(carry, x, y, &d); - return {d, static_cast(carryout)}; + *carry = __builtin_ia32_sbb_u64((unsigned char)*carry, x, y, &d); + return d; } #endif const auto d = x - y; - const auto carry1 = x < y; - const auto e = d - carry; - const auto carry2 = d < uint64_t{carry}; - return {e, carry1 || carry2}; + const auto e = d - *carry; + *carry = uint64_t(x < y) | uint64_t(d < *carry); + return e; } /// Addition with carry. template -inline constexpr result_with_carry> addc( - const uint& x, const uint& y, bool carry = false) noexcept +inline constexpr uint addc( + const uint& x, const uint& y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int) { uint s; - bool k = carry; for (size_t i = 0; i < uint::num_words; ++i) { - const auto t = addc(x[i], y[i], k); - s[i] = t.value; - k = t.carry; + s[i] = addc(x[i], y[i], carry); } - return {s, k}; + return s; } inline constexpr uint128 operator+(uint128 x, uint128 y) noexcept { - return addc(x, y).value; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + return addc(x, y, &carry); } inline constexpr uint128 operator+(uint128 x) noexcept @@ -250,23 +228,21 @@ inline constexpr uint128 operator+(uint128 x) noexcept /// Performs subtraction of two unsigned numbers and returns the difference /// and the carry bit (aka borrow, overflow). template -inline constexpr result_with_carry> subc( - const uint& x, const uint& y, bool carry = false) noexcept +inline constexpr uint subc( + const uint& x, const uint& y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int) { uint z; - bool k = carry; for (size_t i = 0; i < uint::num_words; ++i) { - const auto t = subc(x[i], y[i], k); - z[i] = t.value; - k = t.carry; + z[i] = subc(x[i], y[i], carry); } - return {z, k}; + return z; } inline constexpr uint128 operator-(uint128 x, uint128 y) noexcept { - return subc(x, y).value; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + return subc(x, y, &carry); } inline constexpr uint128 operator-(uint128 x) noexcept @@ -1120,24 +1096,14 @@ inline constexpr bool operator!=(const T& x, const uint& y) noexcept return uint(x) != y; } -#if !defined(_MSC_VER) || _MSC_VER < 1916 // This kills MSVC 2017 compiler. -inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept -{ - auto xp = uint128{x[2], x[3]}; - auto yp = uint128{y[2], y[3]}; - if (xp == yp) - { - xp = uint128{x[0], x[1]}; - yp = uint128{y[0], y[1]}; - } - return xp < yp; -} -#endif - template inline constexpr bool operator<(const uint& x, const uint& y) noexcept { - return subc(x, y).carry; + for (size_t i = uint::num_words; i-- > 1; ) { + if (x[i] != y[i]) + return x[i] < y[i]; + } + return x[0] < y[0]; } template operator<<(const uint& x, uint64_t shift) noexcept const auto skip = static_cast(shift / word_bits); uint r; - uint64_t carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) for (size_t i = 0; i < (uint::num_words - skip); ++i) { r[i + skip] = (x[i] << s) | carry; @@ -1362,7 +1328,7 @@ inline constexpr uint operator>>(const uint& x, uint64_t shift) noexcept const auto skip = static_cast(shift / word_bits); uint r; - uint64_t carry = 0; + unsigned long long carry = 0; // NOLINT(google-runtime-int) for (size_t i = 0; i < (num_words - skip); ++i) { r[num_words - 1 - i - skip] = (x[num_words - 1 - i] >> s) | carry; @@ -1461,7 +1427,8 @@ inline const uint8_t* as_bytes(const T& x) noexcept template inline constexpr uint operator+(const uint& x, const uint& y) noexcept { - return addc(x, y).value; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + return addc(x, y, &carry); } template @@ -1473,7 +1440,8 @@ inline constexpr uint operator-(const uint& x) noexcept template inline constexpr uint operator-(const uint& x, const uint& y) noexcept { - return subc(x, y).value; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + return subc(x, y, &carry); } template umul(const uint& x, const uint& y) noexcept uint64_t k = 0; for (size_t i = 0; i < num_words; ++i) { - const auto a = addc(p[i + j], k); - const auto t = umul(x[i], y[j]) + uint128{a.value, a.carry}; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + const auto a = addc(p[i + j], k, &carry); + const auto t = umul(x[i], y[j]) + uint128{a, carry}; p[i + j] = t[0]; k = t[1]; } @@ -1524,8 +1493,9 @@ inline constexpr uint operator*(const uint& x, const uint& y) noexcept uint64_t k = 0; for (size_t i = 0; i < (num_words - j - 1); i++) { - const auto a = addc(p[i + j], k); - const auto t = umul(x[i], y[j]) + uint128{a.value, a.carry}; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + const auto a = addc(p[i + j], k, &carry); + const auto t = umul(x[i], y[j]) + uint128{a, carry}; p[i + j] = t[0]; k = t[1]; } @@ -1715,10 +1685,10 @@ inline bool add(uint64_t s[], const uint64_t x[], const uint64_t y[], int len) n // OPT: Add MinLen template parameter and unroll first loop iterations. INTX_REQUIRE(len >= 2); - bool carry = false; + unsigned long long carry = 0; // NOLINT(google-runtime-int) for (int i = 0; i < len; ++i) - std::tie(s[i], carry) = addc(x[i], y[i], carry); - return carry; + s[i] = addc(x[i], y[i], &carry); + return !!carry; } /// r = x - multiplier * y. @@ -1766,12 +1736,13 @@ inline void udivrem_knuth( uint128 rhat; std::tie(qhat, rhat) = udivrem_3by2(u2, u1, u0, divisor, reciprocal); - bool carry{}; const auto overflow = submul(&u[j], &u[j], d, dlen - 2, qhat); - std::tie(u[j + dlen - 2], carry) = subc(rhat[0], overflow); - std::tie(u[j + dlen - 1], carry) = subc(rhat[1], carry); + unsigned long long carry1 = 0; // NOLINT(google-runtime-int) + u[j + dlen - 2] = subc(rhat[0], overflow, &carry1); + unsigned long long carry2 = 0; // NOLINT(google-runtime-int) + u[j + dlen - 1] = subc(rhat[1], carry1, &carry2); - if (INTX_UNLIKELY(carry)) + if (INTX_UNLIKELY(!!carry2)) { --qhat; u[j + dlen - 1] += divisor[1] + add(&u[j], &u[j], d, dlen - 1); @@ -2035,28 +2006,31 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no { // Normalize x in case it is bigger than mod. auto xn = x; - const auto xd = subc(x, mod); - if (!xd.carry) - xn = xd.value; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + const auto xd = subc(x, mod, &carry); + if (!carry) + xn = xd; // Normalize y in case it is bigger than mod. auto yn = y; - const auto yd = subc(y, mod); - if (!yd.carry) - yn = yd.value; - - const auto a = addc(xn, yn); - const auto av = a.value; - const auto b = subc(av, mod); - const auto bv = b.value; - if (a.carry || !b.carry) + carry = 0; + const auto yd = subc(y, mod, &carry); + if (!carry) + yn = yd; + + carry = 0; + unsigned long long carry2 = 0; // NOLINT(google-runtime-int) + const auto av = addc(xn, yn, &carry2); + const auto bv = subc(av, mod, &carry); + if (carry2 || !carry) return bv; return av; } - const auto s = addc(x, y); - uint<256 + 64> n = s.value; - n[4] = s.carry; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + const auto s = addc(x, y, &carry); + uint<256 + 64> n = s; + n[4] = carry; return udivrem(n, mod).rem; } diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index ff37d683..aefdd44a 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -355,7 +355,9 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3); [[gnu::noinline]] static bool lt_sub(const uint256& x, const uint256& y) noexcept { - return subc(x, y).carry; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + subc(x, y, &carry); + return !!carry; } [[gnu::noinline]] static bool lt_wordcmp(const uint256& x, const uint256& y) noexcept diff --git a/test/experimental/addmod.hpp b/test/experimental/addmod.hpp index 78c0e042..38a7048f 100644 --- a/test/experimental/addmod.hpp +++ b/test/experimental/addmod.hpp @@ -16,9 +16,9 @@ namespace intx::test [[maybe_unused, gnu::noinline]] static uint256 addmod_simple( const uint256& x, const uint256& y, const uint256& mod) noexcept { - const auto s = addc(x, y); - uint<256 + 64> n = s.value; - n[4] = s.carry; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + uint<256 + 64> n = addc(x, y, &carry); + n[4] = carry; return udivrem(n, mod).rem; } @@ -28,9 +28,9 @@ namespace intx::test const auto xm = x >= mod ? x % mod : x; const auto ym = y >= mod ? y % mod : y; - const auto s = addc(xm, ym); - auto sum = s.value; - if (s.carry || s.value >= mod) + unsigned long long carry = 0; // NOLINT(google-runtime-int) + auto sum = addc(xm, ym, &carry); + if (carry || sum >= mod) sum -= mod; return sum; } @@ -43,22 +43,28 @@ namespace intx::test // Based on https://github.com/holiman/uint256/pull/86. if ((m[3] != 0) && (x[3] <= m[3]) && (y[3] <= m[3])) { - auto s = subc(x, m); - if (s.carry) - s.value = x; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + auto s = subc(x, m, &carry); + if (carry) + s = x; - auto t = subc(y, m); - if (t.carry) - t.value = y; + carry = 0; + auto t = subc(y, m, &carry); + if (carry) + t = y; - s = addc(s.value, t.value); - t = subc(s.value, m); - return (s.carry || !t.carry) ? t.value : s.value; + carry = 0; + s = addc(s, t, &carry); + + unsigned long long carry2 = 0; // NOLINT(google-runtime-int) + t = subc(s, m, &carry2); + + return (carry || !carry2) ? t : s; } - const auto s = addc(x, y); - uint<256 + 64> n = s.value; - n[4] = s.carry; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + uint<256 + 64> n = addc(x, y, &carry); + n[4] = carry; return udivrem(n, m).rem; } @@ -72,28 +78,31 @@ namespace intx::test { // Normalize x in case it is bigger than mod. auto xn = x; - const auto xd = subc(x, mod); - if (!xd.carry) - xn = xd.value; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + const auto xd = subc(x, mod, &carry); + if (!carry) + xn = xd; // Normalize y in case it is bigger than mod. auto yn = y; - const auto yd = subc(y, mod); - if (!yd.carry) - yn = yd.value; - - const auto a = addc(xn, yn); - const auto av = a.value; - const auto b = subc(av, mod); - const auto bv = b.value; - if (a.carry || !b.carry) + carry = 0; + const auto yd = subc(y, mod, &carry); + if (!carry) + yn = yd; + + carry = 0; + const auto av = addc(xn, yn, &carry); + + unsigned long long carry2 = 0; // NOLINT(google-runtime-int) + const auto bv = subc(av, mod, &carry2); + if (carry || !carry2) return bv; return av; } - const auto s = addc(x, y); - uint<256 + 64> n = s.value; - n[4] = s.carry; + unsigned long long carry = 0; // NOLINT(google-runtime-int) + uint<256 + 64> n = addc(x, y, &carry); + n[4] = carry; return udivrem(n, mod).rem; } } // namespace intx::test diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp index a11f2a5d..ab36f11a 100644 --- a/test/unittests/test_builtins.cpp +++ b/test/unittests/test_builtins.cpp @@ -31,15 +31,24 @@ static_assert(to_big_endian(uint32_t{0x0d0c0b0a}) == (is_le ? 0x0a0b0c0d : 0x0d0 static_assert(to_big_endian(uint64_t{0x02010f0e0d0c0b0a}) == (is_le ? 0x0a0b0c0d0e0f0102 : 0x02010f0e0d0c0b0a)); -static_assert(addc(0, 0).value == 0); -static_assert(!addc(0, 0).carry); -static_assert(addc(0xffffffffffffffff, 2).value == 1); -static_assert(addc(0xffffffffffffffff, 2).carry); - -static_assert(subc(0, 0).value == 0); -static_assert(!subc(0, 0).carry); -static_assert(subc(0, 1).value == 0xffffffffffffffff); -static_assert(subc(0, 1).carry); + +TEST(builtins, addc) +{ + unsigned long long carry = 0; // NOLINT(google-runtime-int) + EXPECT_EQ(addc(0, 0, &carry), 0); + EXPECT_EQ(carry, 0); + EXPECT_EQ(addc(0xffffffffffffffff, 2, &carry), 1); + EXPECT_NE(carry, 0); +} + +TEST(builtins, subc) +{ + unsigned long long carry = 0; // NOLINT(google-runtime-int) + EXPECT_EQ(subc(0, 0, &carry), 0); + EXPECT_TRUE(!carry); + EXPECT_EQ(subc(0, 1, &carry), 0xffffffffffffffff); + EXPECT_TRUE(carry); +} TEST(builtins, clz64_single_one)