From 905f86e529c5f99e75e4f3d284ae475508723d42 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sat, 2 Apr 2022 18:53:49 -0400
Subject: [PATCH 01/15] streamline/speed up addc/subc by using in/out carry
 parameter

---
 include/intx/intx.hpp            | 136 ++++++++++++++++---------------
 test/benchmarks/benchmarks.cpp   |   4 +-
 test/experimental/addmod.hpp     |  75 +++++++++--------
 test/unittests/test_builtins.cpp |  27 ++++--
 4 files changed, 133 insertions(+), 109 deletions(-)
diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index aebf1f93..4412b109 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -163,83 +163,79 @@ struct result_with_carry
 /// Linear arithmetic operators.
 /// @{
 
-/// Addition with carry.
-inline constexpr result_with_carry<uint64_t> addc(
-    uint64_t x, uint64_t y, bool carry = false) noexcept
+/// Addition with carry. `uint64_t *carry` is used as in/out parameter
+inline constexpr uint64_t addc(
+    uint64_t x, uint64_t y, uint64_t *carry) noexcept
 {
 #if __has_builtin(__builtin_addcll)
     if (!is_constant_evaluated())
     {
-        unsigned long long carryout = 0;  // NOLINT(google-runtime-int)
-        const auto s = __builtin_addcll(x, y, carry, &carryout);
         static_assert(sizeof(s) == sizeof(uint64_t));
-        return {s, static_cast<bool>(carryout)};
+        return __builtin_addcll(x, y, *carry);
     }
 #elif __has_builtin(__builtin_ia32_addcarryx_u64)
     if (!is_constant_evaluated())
     {
         unsigned long long s = 0;  // NOLINT(google-runtime-int)
         static_assert(sizeof(s) == sizeof(uint64_t));
-        const auto carryout = __builtin_ia32_addcarryx_u64(carry, x, y, &s);
-        return {s, static_cast<bool>(carryout)};
+        *carry = __builtin_ia32_addcarryx_u64(*carry, x, y, &s);
+        return s;
     }
 #endif
 
     const auto s = x + y;
     const auto carry1 = s < x;
-    const auto t = s + carry;
+    const auto t = s + *carry;
     const auto carry2 = t < s;
-    return {t, carry1 || carry2};
+    *carry = !!(carry1 || carry2);
+    return t;
 }
 
-/// Subtraction with carry (borrow).
-inline constexpr result_with_carry<uint64_t> subc(
-    uint64_t x, uint64_t y, bool carry = false) noexcept
+/// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter
+inline constexpr uint64_t subc(
+    uint64_t x, uint64_t y, uint64_t *carry) noexcept
 {
 #if __has_builtin(__builtin_subcll)
     if (!is_constant_evaluated())
     {
-        unsigned long long carryout = 0;  // NOLINT(google-runtime-int)
-        const auto d = __builtin_subcll(x, y, carry, &carryout);
         static_assert(sizeof(d) == sizeof(uint64_t));
-        return {d, static_cast<bool>(carryout)};
+        return __builtin_subcll(x, y, *carry, carry);
     }
 #elif __has_builtin(__builtin_ia32_sbb_u64)
     if (!is_constant_evaluated())
     {
         unsigned long long d = 0;  // NOLINT(google-runtime-int)
         static_assert(sizeof(d) == sizeof(uint64_t));
-        const auto carryout = __builtin_ia32_sbb_u64(carry, x, y, &d);
-        return {d, static_cast<bool>(carryout)};
+        *carry = __builtin_ia32_sbb_u64(*carry, x, y, &d);
+        return d;
     }
 #endif
 
     const auto d = x - y;
     const auto carry1 = x < y;
-    const auto e = d - carry;
-    const auto carry2 = d < uint64_t{carry};
-    return {e, carry1 || carry2};
+    const auto e = d - *carry;
+    const auto carry2 = d < *carry;
+    *carry = !!(carry1 || carry2);
+    return e;
 }
 
 /// Addition with carry.
 template <unsigned N>
-inline constexpr result_with_carry<uint<N>> addc(
-    const uint<N>& x, const uint<N>& y, bool carry = false) noexcept
+inline constexpr uint<N> addc(
+    const uint<N>& x, const uint<N>& y, uint64_t *carry) noexcept
 {
     uint<N> s;
-    bool k = carry;
     for (size_t i = 0; i < uint<N>::num_words; ++i)
     {
-        const auto t = addc(x[i], y[i], k);
-        s[i] = t.value;
-        k = t.carry;
+        s[i] = addc(x[i], y[i], carry);
     }
-    return {s, k};
+    return s;
 }
 
 inline constexpr uint128 operator+(uint128 x, uint128 y) noexcept
 {
-    return addc(x, y).value;
+    uint64_t carry = 0;
+    return addc(x, y, &carry);
 }
 
 inline constexpr uint128 operator+(uint128 x) noexcept
@@ -250,23 +246,21 @@ inline constexpr uint128 operator+(uint128 x) noexcept
 /// Performs subtraction of two unsigned numbers and returns the difference
 /// and the carry bit (aka borrow, overflow).
 template <unsigned N>
-inline constexpr result_with_carry<uint<N>> subc(
-    const uint<N>& x, const uint<N>& y, bool carry = false) noexcept
+inline constexpr uint<N> subc(
+    const uint<N>& x, const uint<N>& y, uint64_t *carry) noexcept
 {
     uint<N> z;
-    bool k = carry;
     for (size_t i = 0; i < uint<N>::num_words; ++i)
     {
-        const auto t = subc(x[i], y[i], k);
-        z[i] = t.value;
-        k = t.carry;
+        z[i] = subc(x[i], y[i], carry);
     }
-    return {z, k};
+    return z;
 }
 
 inline constexpr uint128 operator-(uint128 x, uint128 y) noexcept
 {
-    return subc(x, y).value;
+    uint64_t carry = 0;
+    return subc(x, y, &carry);
 }
 
 inline constexpr uint128 operator-(uint128 x) noexcept
@@ -1137,7 +1131,9 @@ inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept
 template <unsigned N>
 inline constexpr bool operator<(const uint<N>& x, const uint<N>& y) noexcept
 {
-    return subc(x, y).carry;
+    uint64_t carry = 0;
+    subc(x, y, &carry);
+    return !!carry;
 }
 
 template <unsigned N, typename T,
@@ -1461,7 +1457,8 @@ inline const uint8_t* as_bytes(const T& x) noexcept
 template <unsigned N>
 inline constexpr uint<N> operator+(const uint<N>& x, const uint<N>& y) noexcept
 {
-    return addc(x, y).value;
+    uint64_t carry = 0;
+    return addc(x, y, &carry);
 }
 
 template <unsigned N>
@@ -1473,7 +1470,8 @@ inline constexpr uint<N> operator-(const uint<N>& x) noexcept
 template <unsigned N>
 inline constexpr uint<N> operator-(const uint<N>& x, const uint<N>& y) noexcept
 {
-    return subc(x, y).value;
+    uint64_t carry = 0;
+    return subc(x, y, &carry);
 }
 
 template <unsigned N, typename T,
@@ -1501,8 +1499,9 @@ inline constexpr uint<2 * N> umul(const uint<N>& x, const uint<N>& y) noexcept
         uint64_t k = 0;
         for (size_t i = 0; i < num_words; ++i)
         {
-            const auto a = addc(p[i + j], k);
-            const auto t = umul(x[i], y[j]) + uint128{a.value, a.carry};
+            uint64_t carry = 0;
+            const auto a = addc(p[i + j], k, &carry);
+            const auto t = umul(x[i], y[j]) + uint128{a, carry};
             p[i + j] = t[0];
             k = t[1];
         }
@@ -1524,8 +1523,9 @@ inline constexpr uint<N> operator*(const uint<N>& x, const uint<N>& y) noexcept
         uint64_t k = 0;
         for (size_t i = 0; i < (num_words - j - 1); i++)
         {
-            const auto a = addc(p[i + j], k);
-            const auto t = umul(x[i], y[j]) + uint128{a.value, a.carry};
+            uint64_t carry = 0;
+            const auto a = addc(p[i + j], k, &carry);
+            const auto t = umul(x[i], y[j]) + uint128{a, carry};
             p[i + j] = t[0];
             k = t[1];
         }
@@ -1715,10 +1715,10 @@ inline bool add(uint64_t s[], const uint64_t x[], const uint64_t y[], int len) n
     // OPT: Add MinLen template parameter and unroll first loop iterations.
     INTX_REQUIRE(len >= 2);
 
-    bool carry = false;
+    uint64_t carry = 0;
     for (int i = 0; i < len; ++i)
-        std::tie(s[i], carry) = addc(x[i], y[i], carry);
-    return carry;
+        s[i] = addc(x[i], y[i], &carry);
+    return !!carry;
 }
 
 /// r = x - multiplier * y.
@@ -1766,12 +1766,13 @@ inline void udivrem_knuth(
             uint128 rhat;
             std::tie(qhat, rhat) = udivrem_3by2(u2, u1, u0, divisor, reciprocal);
 
-            bool carry{};
             const auto overflow = submul(&u[j], &u[j], d, dlen - 2, qhat);
-            std::tie(u[j + dlen - 2], carry) = subc(rhat[0], overflow);
-            std::tie(u[j + dlen - 1], carry) = subc(rhat[1], carry);
+            uint64_t carry1 = 0;
+            u[j + dlen - 2] = subc(rhat[0], overflow, &carry1);
+            uint64_t carry2 = 0;
+            u[j + dlen - 1] = subc(rhat[1], carry1, &carry2);
 
-            if (INTX_UNLIKELY(carry))
+            if (INTX_UNLIKELY(!!carry2))
             {
                 --qhat;
                 u[j + dlen - 1] += divisor[1] + add(&u[j], &u[j], d, dlen - 1);
@@ -2035,28 +2036,31 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no
     {
         // Normalize x in case it is bigger than mod.
         auto xn = x;
-        const auto xd = subc(x, mod);
-        if (!xd.carry)
-            xn = xd.value;
+        uint64_t carry = 0;
+        const auto xd = subc(x, mod, &carry);
+        if (!carry)
+            xn = xd;
 
         // Normalize y in case it is bigger than mod.
         auto yn = y;
-        const auto yd = subc(y, mod);
-        if (!yd.carry)
-            yn = yd.value;
-
-        const auto a = addc(xn, yn);
-        const auto av = a.value;
-        const auto b = subc(av, mod);
-        const auto bv = b.value;
-        if (a.carry || !b.carry)
+        carry = 0;
+        const auto yd = subc(y, mod, &carry);
+        if (!carry)
+            yn = yd;
+
+        carry = 0;
+        uint64_t carry2 = 0;
+        const auto av = addc(xn, yn, &carry2);
+        const auto bv = subc(av, mod, &carry);
+        if (carry2 || !carry)
             return bv;
         return av;
     }
 
-    const auto s = addc(x, y);
-    uint<256 + 64> n = s.value;
-    n[4] = s.carry;
+    uint64_t carry = 0;
+    const auto s = addc(x, y, &carry);
+    uint<256 + 64> n = s;
+    n[4] = carry;
     return udivrem(n, mod).rem;
 }
 
diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp
index ff37d683..928e00de 100644
--- a/test/benchmarks/benchmarks.cpp
+++ b/test/benchmarks/benchmarks.cpp
@@ -355,7 +355,9 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3);
 
 [[gnu::noinline]] static bool lt_sub(const uint256& x, const uint256& y) noexcept
 {
-    return subc(x, y).carry;
+    uint64_t carry = 0;
+    subc(x, y, &carry);
+    return !!carry;
 }
 
 [[gnu::noinline]] static bool lt_wordcmp(const uint256& x, const uint256& y) noexcept
diff --git a/test/experimental/addmod.hpp b/test/experimental/addmod.hpp
index 78c0e042..80e250d1 100644
--- a/test/experimental/addmod.hpp
+++ b/test/experimental/addmod.hpp
@@ -16,9 +16,9 @@ namespace intx::test
 [[maybe_unused, gnu::noinline]] static uint256 addmod_simple(
     const uint256& x, const uint256& y, const uint256& mod) noexcept
 {
-    const auto s = addc(x, y);
-    uint<256 + 64> n = s.value;
-    n[4] = s.carry;
+    uint64_t carry = 0;
+    uint<256 + 64> n = addc(x, y, &carry);
+    n[4] = carry;
     return udivrem(n, mod).rem;
 }
 
@@ -28,9 +28,9 @@ namespace intx::test
     const auto xm = x >= mod ? x % mod : x;
     const auto ym = y >= mod ? y % mod : y;
 
-    const auto s = addc(xm, ym);
-    auto sum = s.value;
-    if (s.carry || s.value >= mod)
+    uint64_t carry = 0;
+    auto sum = addc(xm, ym, &carry);
+    if (carry || sum >= mod)
         sum -= mod;
     return sum;
 }
@@ -43,22 +43,28 @@ namespace intx::test
     // Based on https://github.com/holiman/uint256/pull/86.
     if ((m[3] != 0) && (x[3] <= m[3]) && (y[3] <= m[3]))
     {
-        auto s = subc(x, m);
-        if (s.carry)
-            s.value = x;
+        uint64_t carry = 0;
+        auto s = subc(x, m, &carry);
+        if (carry)
+            s = x;
 
-        auto t = subc(y, m);
-        if (t.carry)
-            t.value = y;
+        carry = 0;
+        auto t = subc(y, m, &carry);
+        if (carry)
+            t = y;
 
-        s = addc(s.value, t.value);
-        t = subc(s.value, m);
-        return (s.carry || !t.carry) ? t.value : s.value;
+        carry = 0;
+        s = addc(s, t, &carry);
+
+        uint64_t carry2 = 0;
+        t = subc(s, m, &carry2);
+
+        return (carry || !carry2) ? t : s;
     }
 
-    const auto s = addc(x, y);
-    uint<256 + 64> n = s.value;
-    n[4] = s.carry;
+    uint64_t carry = 0;
+    uint<256 + 64> n = addc(x, y, &carry);
+    n[4] = carry;
     return udivrem(n, m).rem;
 }
 
@@ -72,28 +78,31 @@ namespace intx::test
     {
         // Normalize x in case it is bigger than mod.
         auto xn = x;
-        const auto xd = subc(x, mod);
-        if (!xd.carry)
-            xn = xd.value;
+        uint64_t carry = 0;
+        const auto xd = subc(x, mod, &carry);
+        if (!carry)
+            xn = xd;
 
         // Normalize y in case it is bigger than mod.
         auto yn = y;
-        const auto yd = subc(y, mod);
-        if (!yd.carry)
-            yn = yd.value;
-
-        const auto a = addc(xn, yn);
-        const auto av = a.value;
-        const auto b = subc(av, mod);
-        const auto bv = b.value;
-        if (a.carry || !b.carry)
+        carry = 0;
+        const auto yd = subc(y, mod, &carry);
+        if (!carry)
+            yn = yd;
+
+        carry = 0;
+        const auto av = addc(xn, yn, &carry);
+
+        uint64_t carry2 = 0;
+        const auto bv = subc(av, mod, &carry2);
+        if (carry || !carry2)
             return bv;
         return av;
     }
 
-    const auto s = addc(x, y);
-    uint<256 + 64> n = s.value;
-    n[4] = s.carry;
+    uint64_t carry = 0;
+    uint<256 + 64> n = addc(x, y, &carry);
+    n[4] = carry;
     return udivrem(n, mod).rem;
 }
 }  // namespace intx::test
diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp
index a11f2a5d..545f88a7 100644
--- a/test/unittests/test_builtins.cpp
+++ b/test/unittests/test_builtins.cpp
@@ -31,15 +31,24 @@ static_assert(to_big_endian(uint32_t{0x0d0c0b0a}) == (is_le ? 0x0a0b0c0d : 0x0d0
 static_assert(to_big_endian(uint64_t{0x02010f0e0d0c0b0a}) ==
               (is_le ? 0x0a0b0c0d0e0f0102 : 0x02010f0e0d0c0b0a));
 
-static_assert(addc(0, 0).value == 0);
-static_assert(!addc(0, 0).carry);
-static_assert(addc(0xffffffffffffffff, 2).value == 1);
-static_assert(addc(0xffffffffffffffff, 2).carry);
-
-static_assert(subc(0, 0).value == 0);
-static_assert(!subc(0, 0).carry);
-static_assert(subc(0, 1).value == 0xffffffffffffffff);
-static_assert(subc(0, 1).carry);
+
+TEST(builtins, addc)
+{
+    uint64_t carry = 0;
+    EXPECT_EQ(addc(0, 0, &carry), 0);
+    EXPECT_EQ(carry, 0);
+    EXPECT_EQ(addc(0xffffffffffffffff, 2, &carry), 1);
+    EXPECT_NE(carry,  0);
+}
+
+TEST(builtins, subc)
+{
+    uint64_t carry = 0;
+    EXPECT_EQ(subc(0, 0, &carry), 0);
+    EXPECT_TRUE(!carry);
+    EXPECT_EQ(subc(0, 1, &carry), 0xffffffffffffffff);
+    EXPECT_TRUE(carry);
+}
 
 
 TEST(builtins, clz64_single_one)

From ebe54df36e95bf568215e9ce24c4ecfc03e01460 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sat, 2 Apr 2022 18:59:39 -0400
Subject: [PATCH 02/15] add cast for clang build.

---
 include/intx/intx.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index 4412b109..4fb26be1 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -178,7 +178,7 @@ inline constexpr uint64_t addc(
     {
         unsigned long long s = 0;  // NOLINT(google-runtime-int)
         static_assert(sizeof(s) == sizeof(uint64_t));
-        *carry = __builtin_ia32_addcarryx_u64(*carry, x, y, &s);
+        *carry = __builtin_ia32_addcarryx_u64((unsigned char)*carry, x, y, &s);
         return s;
     }
 #endif
@@ -206,7 +206,7 @@ inline constexpr uint64_t subc(
     {
         unsigned long long d = 0;  // NOLINT(google-runtime-int)
         static_assert(sizeof(d) == sizeof(uint64_t));
-        *carry = __builtin_ia32_sbb_u64(*carry, x, y, &d);
+        *carry = __builtin_ia32_sbb_u64((unsigned char)*carry, x, y, &d);
         return d;
     }
 #endif

From 9074a9e74a41dbdeda76a84146be74b7e868e81a Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sat, 2 Apr 2022 19:03:26 -0400
Subject: [PATCH 03/15] implement some code formatting suggestions

---
 include/intx/intx.hpp            | 12 ++++--------
 test/unittests/test_builtins.cpp |  2 +-
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index 4fb26be1..fcf86df3 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -164,8 +164,7 @@ struct result_with_carry
 /// @{
 
 /// Addition with carry. `uint64_t *carry` is used as in/out parameter
-inline constexpr uint64_t addc(
-    uint64_t x, uint64_t y, uint64_t *carry) noexcept
+inline constexpr uint64_t addc(uint64_t x, uint64_t y, uint64_t *carry) noexcept
 {
 #if __has_builtin(__builtin_addcll)
     if (!is_constant_evaluated())
@@ -192,8 +191,7 @@ inline constexpr uint64_t addc(
 }
 
 /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter
-inline constexpr uint64_t subc(
-    uint64_t x, uint64_t y, uint64_t *carry) noexcept
+inline constexpr uint64_t subc(uint64_t x, uint64_t y, uint64_t *carry) noexcept
 {
 #if __has_builtin(__builtin_subcll)
     if (!is_constant_evaluated())
@@ -221,8 +219,7 @@ inline constexpr uint64_t subc(
 
 /// Addition with carry.
 template <unsigned N>
-inline constexpr uint<N> addc(
-    const uint<N>& x, const uint<N>& y, uint64_t *carry) noexcept
+inline constexpr uint<N> addc(const uint<N>& x, const uint<N>& y, uint64_t *carry) noexcept
 {
     uint<N> s;
     for (size_t i = 0; i < uint<N>::num_words; ++i)
@@ -246,8 +243,7 @@ inline constexpr uint128 operator+(uint128 x) noexcept
 /// Performs subtraction of two unsigned numbers and returns the difference
 /// and the carry bit (aka borrow, overflow).
 template <unsigned N>
-inline constexpr uint<N> subc(
-    const uint<N>& x, const uint<N>& y, uint64_t *carry) noexcept
+inline constexpr uint<N> subc(const uint<N>& x, const uint<N>& y, uint64_t *carry) noexcept
 {
     uint<N> z;
     for (size_t i = 0; i < uint<N>::num_words; ++i)
diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp
index 545f88a7..51747d8a 100644
--- a/test/unittests/test_builtins.cpp
+++ b/test/unittests/test_builtins.cpp
@@ -38,7 +38,7 @@ TEST(builtins, addc)
     EXPECT_EQ(addc(0, 0, &carry), 0);
     EXPECT_EQ(carry, 0);
     EXPECT_EQ(addc(0xffffffffffffffff, 2, &carry), 1);
-    EXPECT_NE(carry,  0);
+    EXPECT_NE(carry, 0);
 }
 
 TEST(builtins, subc)

From ba889d0b284017e3f931059b9f5358eec8cf454a Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sat, 2 Apr 2022 19:12:15 -0400
Subject: [PATCH 04/15] Fix a couple typos

---
 include/intx/intx.hpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index fcf86df3..e9ae0047 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -169,8 +169,7 @@ inline constexpr uint64_t addc(uint64_t x, uint64_t y, uint64_t *carry) noexcept
 #if __has_builtin(__builtin_addcll)
     if (!is_constant_evaluated())
     {
-        static_assert(sizeof(s) == sizeof(uint64_t));
-        return __builtin_addcll(x, y, *carry);
+        return __builtin_addcll(x, y, *carry, carry);
     }
 #elif __has_builtin(__builtin_ia32_addcarryx_u64)
     if (!is_constant_evaluated())
@@ -196,7 +195,6 @@ inline constexpr uint64_t subc(uint64_t x, uint64_t y, uint64_t *carry) noexcept
 #if __has_builtin(__builtin_subcll)
     if (!is_constant_evaluated())
     {
-        static_assert(sizeof(d) == sizeof(uint64_t));
         return __builtin_subcll(x, y, *carry, carry);
     }
 #elif __has_builtin(__builtin_ia32_sbb_u64)

From 183afb2aa67b2c9f9ffcfa4e27b53bef9cdfc306 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sat, 2 Apr 2022 19:23:23 -0400
Subject: [PATCH 05/15] fix clang build issues

---
 include/intx/intx.hpp            | 38 ++++++++++++++++----------------
 test/benchmarks/benchmarks.cpp   |  2 +-
 test/experimental/addmod.hpp     | 16 +++++++-------
 test/unittests/test_builtins.cpp |  4 ++--
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index e9ae0047..e3aabe12 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -164,7 +164,7 @@ struct result_with_carry
 /// @{
 
 /// Addition with carry. `uint64_t *carry` is used as in/out parameter
-inline constexpr uint64_t addc(uint64_t x, uint64_t y, uint64_t *carry) noexcept
+inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long *carry) noexcept
 {
 #if __has_builtin(__builtin_addcll)
     if (!is_constant_evaluated())
@@ -190,7 +190,7 @@ inline constexpr uint64_t addc(uint64_t x, uint64_t y, uint64_t *carry) noexcept
 }
 
 /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter
-inline constexpr uint64_t subc(uint64_t x, uint64_t y, uint64_t *carry) noexcept
+inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long *carry) noexcept
 {
 #if __has_builtin(__builtin_subcll)
     if (!is_constant_evaluated())
@@ -217,7 +217,7 @@ inline constexpr uint64_t subc(uint64_t x, uint64_t y, uint64_t *carry) noexcept
 
 /// Addition with carry.
 template <unsigned N>
-inline constexpr uint<N> addc(const uint<N>& x, const uint<N>& y, uint64_t *carry) noexcept
+inline constexpr uint<N> addc(const uint<N>& x, const uint<N>& y, unsigned long long *carry) noexcept
 {
     uint<N> s;
     for (size_t i = 0; i < uint<N>::num_words; ++i)
@@ -229,7 +229,7 @@ inline constexpr uint<N> addc(const uint<N>& x, const uint<N>& y, uint64_t *carr
 
 inline constexpr uint128 operator+(uint128 x, uint128 y) noexcept
 {
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     return addc(x, y, &carry);
 }
 
@@ -241,7 +241,7 @@ inline constexpr uint128 operator+(uint128 x) noexcept
 /// Performs subtraction of two unsigned numbers and returns the difference
 /// and the carry bit (aka borrow, overflow).
 template <unsigned N>
-inline constexpr uint<N> subc(const uint<N>& x, const uint<N>& y, uint64_t *carry) noexcept
+inline constexpr uint<N> subc(const uint<N>& x, const uint<N>& y, unsigned long long *carry) noexcept
 {
     uint<N> z;
     for (size_t i = 0; i < uint<N>::num_words; ++i)
@@ -253,7 +253,7 @@ inline constexpr uint<N> subc(const uint<N>& x, const uint<N>& y, uint64_t *carr
 
 inline constexpr uint128 operator-(uint128 x, uint128 y) noexcept
 {
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     return subc(x, y, &carry);
 }
 
@@ -1125,7 +1125,7 @@ inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept
 template <unsigned N>
 inline constexpr bool operator<(const uint<N>& x, const uint<N>& y) noexcept
 {
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     subc(x, y, &carry);
     return !!carry;
 }
@@ -1299,7 +1299,7 @@ inline constexpr uint<N> operator<<(const uint<N>& x, uint64_t shift) noexcept
     const auto skip = static_cast<size_t>(shift / word_bits);
 
     uint<N> r;
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     for (size_t i = 0; i < (uint<N>::num_words - skip); ++i)
     {
         r[i + skip] = (x[i] << s) | carry;
@@ -1352,7 +1352,7 @@ inline constexpr uint<N> operator>>(const uint<N>& x, uint64_t shift) noexcept
     const auto skip = static_cast<size_t>(shift / word_bits);
 
     uint<N> r;
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     for (size_t i = 0; i < (num_words - skip); ++i)
     {
         r[num_words - 1 - i - skip] = (x[num_words - 1 - i] >> s) | carry;
@@ -1451,7 +1451,7 @@ inline const uint8_t* as_bytes(const T& x) noexcept
 template <unsigned N>
 inline constexpr uint<N> operator+(const uint<N>& x, const uint<N>& y) noexcept
 {
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     return addc(x, y, &carry);
 }
 
@@ -1464,7 +1464,7 @@ inline constexpr uint<N> operator-(const uint<N>& x) noexcept
 template <unsigned N>
 inline constexpr uint<N> operator-(const uint<N>& x, const uint<N>& y) noexcept
 {
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     return subc(x, y, &carry);
 }
 
@@ -1493,7 +1493,7 @@ inline constexpr uint<2 * N> umul(const uint<N>& x, const uint<N>& y) noexcept
         uint64_t k = 0;
         for (size_t i = 0; i < num_words; ++i)
         {
-            uint64_t carry = 0;
+            unsigned long long carry = 0;
             const auto a = addc(p[i + j], k, &carry);
             const auto t = umul(x[i], y[j]) + uint128{a, carry};
             p[i + j] = t[0];
@@ -1517,7 +1517,7 @@ inline constexpr uint<N> operator*(const uint<N>& x, const uint<N>& y) noexcept
         uint64_t k = 0;
         for (size_t i = 0; i < (num_words - j - 1); i++)
         {
-            uint64_t carry = 0;
+            unsigned long long carry = 0;
             const auto a = addc(p[i + j], k, &carry);
             const auto t = umul(x[i], y[j]) + uint128{a, carry};
             p[i + j] = t[0];
@@ -1709,7 +1709,7 @@ inline bool add(uint64_t s[], const uint64_t x[], const uint64_t y[], int len) n
     // OPT: Add MinLen template parameter and unroll first loop iterations.
     INTX_REQUIRE(len >= 2);
 
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     for (int i = 0; i < len; ++i)
         s[i] = addc(x[i], y[i], &carry);
     return !!carry;
@@ -1761,9 +1761,9 @@ inline void udivrem_knuth(
             std::tie(qhat, rhat) = udivrem_3by2(u2, u1, u0, divisor, reciprocal);
 
             const auto overflow = submul(&u[j], &u[j], d, dlen - 2, qhat);
-            uint64_t carry1 = 0;
+            unsigned long long carry1 = 0;
             u[j + dlen - 2] = subc(rhat[0], overflow, &carry1);
-            uint64_t carry2 = 0;
+            unsigned long long carry2 = 0;
             u[j + dlen - 1] = subc(rhat[1], carry1, &carry2);
 
             if (INTX_UNLIKELY(!!carry2))
@@ -2030,7 +2030,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no
     {
         // Normalize x in case it is bigger than mod.
         auto xn = x;
-        uint64_t carry = 0;
+        unsigned long long carry = 0;
         const auto xd = subc(x, mod, &carry);
         if (!carry)
             xn = xd;
@@ -2043,7 +2043,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no
             yn = yd;
 
         carry = 0;
-        uint64_t carry2 = 0;
+        unsigned long long carry2 = 0;
         const auto av = addc(xn, yn, &carry2);
         const auto bv = subc(av, mod, &carry);
         if (carry2 || !carry)
@@ -2051,7 +2051,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no
         return av;
     }
 
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     const auto s = addc(x, y, &carry);
     uint<256 + 64> n = s;
     n[4] = carry;
diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp
index 928e00de..25a0184c 100644
--- a/test/benchmarks/benchmarks.cpp
+++ b/test/benchmarks/benchmarks.cpp
@@ -355,7 +355,7 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3);
 
 [[gnu::noinline]] static bool lt_sub(const uint256& x, const uint256& y) noexcept
 {
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     subc(x, y, &carry);
     return !!carry;
 }
diff --git a/test/experimental/addmod.hpp b/test/experimental/addmod.hpp
index 80e250d1..6153c299 100644
--- a/test/experimental/addmod.hpp
+++ b/test/experimental/addmod.hpp
@@ -16,7 +16,7 @@ namespace intx::test
 [[maybe_unused, gnu::noinline]] static uint256 addmod_simple(
     const uint256& x, const uint256& y, const uint256& mod) noexcept
 {
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     uint<256 + 64> n = addc(x, y, &carry);
     n[4] = carry;
     return udivrem(n, mod).rem;
@@ -28,7 +28,7 @@ namespace intx::test
     const auto xm = x >= mod ? x % mod : x;
     const auto ym = y >= mod ? y % mod : y;
 
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     auto sum = addc(xm, ym, &carry);
     if (carry || sum >= mod)
         sum -= mod;
@@ -43,7 +43,7 @@ namespace intx::test
     // Based on https://github.com/holiman/uint256/pull/86.
     if ((m[3] != 0) && (x[3] <= m[3]) && (y[3] <= m[3]))
     {
-        uint64_t carry = 0;
+        unsigned long long carry = 0;
         auto s = subc(x, m, &carry);
         if (carry)
             s = x;
@@ -56,13 +56,13 @@ namespace intx::test
         carry = 0;
         s = addc(s, t, &carry);
 
-        uint64_t carry2 = 0;
+        unsigned long long carry2 = 0;
         t = subc(s, m, &carry2);
 
         return (carry || !carry2) ? t : s;
     }
 
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     uint<256 + 64> n = addc(x, y, &carry);
     n[4] = carry;
     return udivrem(n, m).rem;
@@ -78,7 +78,7 @@ namespace intx::test
     {
         // Normalize x in case it is bigger than mod.
         auto xn = x;
-        uint64_t carry = 0;
+        unsigned long long carry = 0;
         const auto xd = subc(x, mod, &carry);
         if (!carry)
             xn = xd;
@@ -93,14 +93,14 @@ namespace intx::test
         carry = 0;
         const auto av = addc(xn, yn, &carry);
 
-        uint64_t carry2 = 0;
+        unsigned long long carry2 = 0;
         const auto bv = subc(av, mod, &carry2);
         if (carry || !carry2)
             return bv;
         return av;
     }
 
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     uint<256 + 64> n = addc(x, y, &carry);
     n[4] = carry;
     return udivrem(n, mod).rem;
diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp
index 51747d8a..e5324661 100644
--- a/test/unittests/test_builtins.cpp
+++ b/test/unittests/test_builtins.cpp
@@ -34,7 +34,7 @@ static_assert(to_big_endian(uint64_t{0x02010f0e0d0c0b0a}) ==
 
 TEST(builtins, addc)
 {
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     EXPECT_EQ(addc(0, 0, &carry), 0);
     EXPECT_EQ(carry, 0);
     EXPECT_EQ(addc(0xffffffffffffffff, 2, &carry), 1);
@@ -43,7 +43,7 @@ TEST(builtins, addc)
 
 TEST(builtins, subc)
 {
-    uint64_t carry = 0;
+    unsigned long long carry = 0;
     EXPECT_EQ(subc(0, 0, &carry), 0);
     EXPECT_TRUE(!carry);
     EXPECT_EQ(subc(0, 1, &carry), 0xffffffffffffffff);

From 4ae0f6f43e3d53a4956b4470b588ae82f012ce5c Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sat, 2 Apr 2022 19:26:01 -0400
Subject: [PATCH 06/15] fix code formatting

---
 include/intx/intx.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index e3aabe12..9a338c28 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -164,7 +164,7 @@ struct result_with_carry
 /// @{
 
 /// Addition with carry. `uint64_t *carry` is used as in/out parameter
-inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long *carry) noexcept
+inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long* carry) noexcept
 {
 #if __has_builtin(__builtin_addcll)
     if (!is_constant_evaluated())
@@ -190,7 +190,7 @@ inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long *carry
 }
 
 /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter
-inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long *carry) noexcept
+inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long* carry) noexcept
 {
 #if __has_builtin(__builtin_subcll)
     if (!is_constant_evaluated())
@@ -217,7 +217,7 @@ inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long *carry
 
 /// Addition with carry.
 template <unsigned N>
-inline constexpr uint<N> addc(const uint<N>& x, const uint<N>& y, unsigned long long *carry) noexcept
+inline constexpr uint<N> addc(const uint<N>& x, const uint<N>& y, unsigned long long* carry) noexcept
 {
     uint<N> s;
     for (size_t i = 0; i < uint<N>::num_words; ++i)
@@ -241,7 +241,7 @@ inline constexpr uint128 operator+(uint128 x) noexcept
 /// Performs subtraction of two unsigned numbers and returns the difference
 /// and the carry bit (aka borrow, overflow).
 template <unsigned N>
-inline constexpr uint<N> subc(const uint<N>& x, const uint<N>& y, unsigned long long *carry) noexcept
+inline constexpr uint<N> subc(const uint<N>& x, const uint<N>& y, unsigned long long* carry) noexcept
 {
     uint<N> z;
     for (size_t i = 0; i < uint<N>::num_words; ++i)

From fe61916bffe206f4554154680d972de28c9b78e0 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sat, 2 Apr 2022 19:27:05 -0400
Subject: [PATCH 07/15] Fix code formatting

---
 include/intx/intx.hpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index 9a338c28..54c0c97d 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -164,7 +164,8 @@ struct result_with_carry
 /// @{
 
 /// Addition with carry. `uint64_t *carry` is used as in/out parameter
-inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long* carry) noexcept
+inline constexpr uint64_t addc(
+    uint64_t x, uint64_t y, unsigned long long* carry) noexcept
 {
 #if __has_builtin(__builtin_addcll)
     if (!is_constant_evaluated())
@@ -190,7 +191,8 @@ inline constexpr uint64_t addc(uint64_t x, uint64_t y, unsigned long long* carry
 }
 
 /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter
-inline constexpr uint64_t subc(uint64_t x, uint64_t y, unsigned long long* carry) noexcept
+inline constexpr uint64_t subc(
+    uint64_t x, uint64_t y, unsigned long long* carry) noexcept
 {
 #if __has_builtin(__builtin_subcll)
     if (!is_constant_evaluated())

From 98d704f4f6b501f11327f10bf380a17255ae04b7 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sat, 2 Apr 2022 19:33:14 -0400
Subject: [PATCH 08/15] lint issues

---
 include/intx/intx.hpp            | 40 +++++++++++++++++---------------
 test/benchmarks/benchmarks.cpp   |  2 +-
 test/experimental/addmod.hpp     | 16 ++++++-------
 test/unittests/test_builtins.cpp |  4 ++--
 4 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index 54c0c97d..18d43968 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -165,7 +165,7 @@ struct result_with_carry
 
 /// Addition with carry. `uint64_t *carry` is used as in/out parameter
 inline constexpr uint64_t addc(
-    uint64_t x, uint64_t y, unsigned long long* carry) noexcept
+    uint64_t x, uint64_t y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int)
 {
 #if __has_builtin(__builtin_addcll)
     if (!is_constant_evaluated())
@@ -192,7 +192,7 @@ inline constexpr uint64_t addc(
 
 /// Subtraction with carry (borrow). `uint64_t *carry` is used as in/out parameter
 inline constexpr uint64_t subc(
-    uint64_t x, uint64_t y, unsigned long long* carry) noexcept
+    uint64_t x, uint64_t y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int)
 {
 #if __has_builtin(__builtin_subcll)
     if (!is_constant_evaluated())
@@ -219,7 +219,8 @@ inline constexpr uint64_t subc(
 
 /// Addition with carry.
 template <unsigned N>
-inline constexpr uint<N> addc(const uint<N>& x, const uint<N>& y, unsigned long long* carry) noexcept
+inline constexpr uint<N> addc(
+    const uint<N>& x, const uint<N>& y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int)
 {
     uint<N> s;
     for (size_t i = 0; i < uint<N>::num_words; ++i)
@@ -231,7 +232,7 @@ inline constexpr uint<N> addc(const uint<N>& x, const uint<N>& y, unsigned long
 
 inline constexpr uint128 operator+(uint128 x, uint128 y) noexcept
 {
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     return addc(x, y, &carry);
 }
 
@@ -243,7 +244,8 @@ inline constexpr uint128 operator+(uint128 x) noexcept
 /// Performs subtraction of two unsigned numbers and returns the difference
 /// and the carry bit (aka borrow, overflow).
 template <unsigned N>
-inline constexpr uint<N> subc(const uint<N>& x, const uint<N>& y, unsigned long long* carry) noexcept
+inline constexpr uint<N> subc(
+    const uint<N>& x, const uint<N>& y, unsigned long long* carry) noexcept // NOLINT(google-runtime-int)
 {
     uint<N> z;
     for (size_t i = 0; i < uint<N>::num_words; ++i)
@@ -255,7 +257,7 @@ inline constexpr uint<N> subc(const uint<N>& x, const uint<N>& y, unsigned long
 
 inline constexpr uint128 operator-(uint128 x, uint128 y) noexcept
 {
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     return subc(x, y, &carry);
 }
 
@@ -1127,7 +1129,7 @@ inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept
 template <unsigned N>
 inline constexpr bool operator<(const uint<N>& x, const uint<N>& y) noexcept
 {
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     subc(x, y, &carry);
     return !!carry;
 }
@@ -1301,7 +1303,7 @@ inline constexpr uint<N> operator<<(const uint<N>& x, uint64_t shift) noexcept
     const auto skip = static_cast<size_t>(shift / word_bits);
 
     uint<N> r;
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     for (size_t i = 0; i < (uint<N>::num_words - skip); ++i)
     {
         r[i + skip] = (x[i] << s) | carry;
@@ -1354,7 +1356,7 @@ inline constexpr uint<N> operator>>(const uint<N>& x, uint64_t shift) noexcept
     const auto skip = static_cast<size_t>(shift / word_bits);
 
     uint<N> r;
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     for (size_t i = 0; i < (num_words - skip); ++i)
     {
         r[num_words - 1 - i - skip] = (x[num_words - 1 - i] >> s) | carry;
@@ -1453,7 +1455,7 @@ inline const uint8_t* as_bytes(const T& x) noexcept
 template <unsigned N>
 inline constexpr uint<N> operator+(const uint<N>& x, const uint<N>& y) noexcept
 {
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     return addc(x, y, &carry);
 }
 
@@ -1466,7 +1468,7 @@ inline constexpr uint<N> operator-(const uint<N>& x) noexcept
 template <unsigned N>
 inline constexpr uint<N> operator-(const uint<N>& x, const uint<N>& y) noexcept
 {
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     return subc(x, y, &carry);
 }
 
@@ -1495,7 +1497,7 @@ inline constexpr uint<2 * N> umul(const uint<N>& x, const uint<N>& y) noexcept
         uint64_t k = 0;
         for (size_t i = 0; i < num_words; ++i)
         {
-            unsigned long long carry = 0;
+            unsigned long long carry = 0; // NOLINT(google-runtime-int)
             const auto a = addc(p[i + j], k, &carry);
             const auto t = umul(x[i], y[j]) + uint128{a, carry};
             p[i + j] = t[0];
@@ -1519,7 +1521,7 @@ inline constexpr uint<N> operator*(const uint<N>& x, const uint<N>& y) noexcept
         uint64_t k = 0;
         for (size_t i = 0; i < (num_words - j - 1); i++)
         {
-            unsigned long long carry = 0;
+            unsigned long long carry = 0; // NOLINT(google-runtime-int)
             const auto a = addc(p[i + j], k, &carry);
             const auto t = umul(x[i], y[j]) + uint128{a, carry};
             p[i + j] = t[0];
@@ -1711,7 +1713,7 @@ inline bool add(uint64_t s[], const uint64_t x[], const uint64_t y[], int len) n
     // OPT: Add MinLen template parameter and unroll first loop iterations.
     INTX_REQUIRE(len >= 2);
 
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     for (int i = 0; i < len; ++i)
         s[i] = addc(x[i], y[i], &carry);
     return !!carry;
@@ -1763,9 +1765,9 @@ inline void udivrem_knuth(
             std::tie(qhat, rhat) = udivrem_3by2(u2, u1, u0, divisor, reciprocal);
 
             const auto overflow = submul(&u[j], &u[j], d, dlen - 2, qhat);
-            unsigned long long carry1 = 0;
+            unsigned long long carry1 = 0; // NOLINT(google-runtime-int)
             u[j + dlen - 2] = subc(rhat[0], overflow, &carry1);
-            unsigned long long carry2 = 0;
+            unsigned long long carry2 = 0; // NOLINT(google-runtime-int)
             u[j + dlen - 1] = subc(rhat[1], carry1, &carry2);
 
             if (INTX_UNLIKELY(!!carry2))
@@ -2032,7 +2034,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no
     {
         // Normalize x in case it is bigger than mod.
         auto xn = x;
-        unsigned long long carry = 0;
+        unsigned long long carry = 0; // NOLINT(google-runtime-int)
         const auto xd = subc(x, mod, &carry);
         if (!carry)
             xn = xd;
@@ -2045,7 +2047,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no
             yn = yd;
 
         carry = 0;
-        unsigned long long carry2 = 0;
+        unsigned long long carry2 = 0; // NOLINT(google-runtime-int)
         const auto av = addc(xn, yn, &carry2);
         const auto bv = subc(av, mod, &carry);
         if (carry2 || !carry)
@@ -2053,7 +2055,7 @@ inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) no
         return av;
     }
 
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     const auto s = addc(x, y, &carry);
     uint<256 + 64> n = s;
     n[4] = carry;
diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp
index 25a0184c..aefdd44a 100644
--- a/test/benchmarks/benchmarks.cpp
+++ b/test/benchmarks/benchmarks.cpp
@@ -355,7 +355,7 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3);
 
 [[gnu::noinline]] static bool lt_sub(const uint256& x, const uint256& y) noexcept
 {
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     subc(x, y, &carry);
     return !!carry;
 }
diff --git a/test/experimental/addmod.hpp b/test/experimental/addmod.hpp
index 6153c299..38a7048f 100644
--- a/test/experimental/addmod.hpp
+++ b/test/experimental/addmod.hpp
@@ -16,7 +16,7 @@ namespace intx::test
 [[maybe_unused, gnu::noinline]] static uint256 addmod_simple(
     const uint256& x, const uint256& y, const uint256& mod) noexcept
 {
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     uint<256 + 64> n = addc(x, y, &carry);
     n[4] = carry;
     return udivrem(n, mod).rem;
@@ -28,7 +28,7 @@ namespace intx::test
     const auto xm = x >= mod ? x % mod : x;
     const auto ym = y >= mod ? y % mod : y;
 
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     auto sum = addc(xm, ym, &carry);
     if (carry || sum >= mod)
         sum -= mod;
@@ -43,7 +43,7 @@ namespace intx::test
     // Based on https://github.com/holiman/uint256/pull/86.
     if ((m[3] != 0) && (x[3] <= m[3]) && (y[3] <= m[3]))
     {
-        unsigned long long carry = 0;
+        unsigned long long carry = 0; // NOLINT(google-runtime-int)
         auto s = subc(x, m, &carry);
         if (carry)
             s = x;
@@ -56,13 +56,13 @@ namespace intx::test
         carry = 0;
         s = addc(s, t, &carry);
 
-        unsigned long long carry2 = 0;
+        unsigned long long carry2 = 0; // NOLINT(google-runtime-int)
         t = subc(s, m, &carry2);
 
         return (carry || !carry2) ? t : s;
     }
 
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     uint<256 + 64> n = addc(x, y, &carry);
     n[4] = carry;
     return udivrem(n, m).rem;
@@ -78,7 +78,7 @@ namespace intx::test
     {
         // Normalize x in case it is bigger than mod.
         auto xn = x;
-        unsigned long long carry = 0;
+        unsigned long long carry = 0; // NOLINT(google-runtime-int)
         const auto xd = subc(x, mod, &carry);
         if (!carry)
             xn = xd;
@@ -93,14 +93,14 @@ namespace intx::test
         carry = 0;
         const auto av = addc(xn, yn, &carry);
 
-        unsigned long long carry2 = 0;
+        unsigned long long carry2 = 0; // NOLINT(google-runtime-int)
         const auto bv = subc(av, mod, &carry2);
         if (carry || !carry2)
             return bv;
         return av;
     }
 
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     uint<256 + 64> n = addc(x, y, &carry);
     n[4] = carry;
     return udivrem(n, mod).rem;
diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp
index e5324661..ab36f11a 100644
--- a/test/unittests/test_builtins.cpp
+++ b/test/unittests/test_builtins.cpp
@@ -34,7 +34,7 @@ static_assert(to_big_endian(uint64_t{0x02010f0e0d0c0b0a}) ==
 
 TEST(builtins, addc)
 {
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     EXPECT_EQ(addc(0, 0, &carry), 0);
     EXPECT_EQ(carry, 0);
     EXPECT_EQ(addc(0xffffffffffffffff, 2, &carry), 1);
@@ -43,7 +43,7 @@ TEST(builtins, addc)
 
 TEST(builtins, subc)
 {
-    unsigned long long carry = 0;
+    unsigned long long carry = 0; // NOLINT(google-runtime-int)
     EXPECT_EQ(subc(0, 0, &carry), 0);
     EXPECT_TRUE(!carry);
     EXPECT_EQ(subc(0, 1, &carry), 0xffffffffffffffff);

From 1dbe06d8ec3075652f6ae15018dbb963dad56186 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sun, 3 Apr 2022 11:38:18 -0400
Subject: [PATCH 09/15] add another optimization in addc

---
 include/intx/intx.hpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index 18d43968..1928d6be 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -182,6 +182,11 @@ inline constexpr uint64_t addc(
     }
 #endif
 
+    if (((x | y) & (uint64_t(1) << 63)) == 0) {
+        const auto t = x + y + *carry;
+        *carry = 0;
+        return t;
+    }
     const auto s = x + y;
     const auto carry1 = s < x;
     const auto t = s + *carry;

From ef4fa8d69a4bdf618dffeec4a3c0de83062f81e4 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sun, 3 Apr 2022 17:53:18 -0400
Subject: [PATCH 10/15] more optimizations. Use shortcuts in comparison instead
 of doing full subtraction

---
 include/intx/intx.hpp | 37 ++++++++++---------------------------
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index 1928d6be..c6a0ffb6 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -148,18 +148,6 @@ inline constexpr bool is_constant_evaluated() noexcept
 }
 
 
-/// Contains result of add/sub/etc with a carry flag.
-template <typename T>
-struct result_with_carry
-{
-    T value;
-    bool carry;
-
-    /// Conversion to tuple of references, to allow usage with std::tie().
-    constexpr operator std::tuple<T&, bool&>() noexcept { return {value, carry}; }
-};
-
-
 /// Linear arithmetic operators.
 /// @{
 
@@ -182,16 +170,9 @@ inline constexpr uint64_t addc(
     }
 #endif
 
-    if (((x | y) & (uint64_t(1) << 63)) == 0) {
-        const auto t = x + y + *carry;
-        *carry = 0;
-        return t;
-    }
     const auto s = x + y;
-    const auto carry1 = s < x;
     const auto t = s + *carry;
-    const auto carry2 = t < s;
-    *carry = !!(carry1 || carry2);
+    *carry = (s < x) || (t < s);
     return t;
 }
 
@@ -215,10 +196,8 @@ inline constexpr uint64_t subc(
 #endif
 
     const auto d = x - y;
-    const auto carry1 = x < y;
     const auto e = d - *carry;
-    const auto carry2 = d < *carry;
-    *carry = !!(carry1 || carry2);
+    *carry = (x < y) || (d < *carry);
     return e;
 }
 
@@ -339,7 +318,7 @@ inline constexpr bool operator<(uint128 x, uint128 y) noexcept
 #if INTX_HAS_BUILTIN_INT128
     return builtin_uint128{x} < builtin_uint128{y};
 #else
-    return (unsigned{x[1] < y[1]} | (unsigned{x[1] == y[1]} & unsigned{x[0] < y[0]})) != 0;
+    return x[1] < y[1] || (x[1] == y[1] && x[0] < y[0]};
 #endif
 }
 
@@ -1134,9 +1113,13 @@ inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept
 template <unsigned N>
 inline constexpr bool operator<(const uint<N>& x, const uint<N>& y) noexcept
 {
-    unsigned long long carry = 0; // NOLINT(google-runtime-int)
-    subc(x, y, &carry);
-    return !!carry;
+    for (size_t i = uint<N>::num_words; i-- > 0; ) {
+        if (x[i] < y[i])
+            return true;
+        if (x[i] > y[i])
+            return false;
+    }
+    return false;
 }
 
 template <unsigned N, typename T,

From 7fab2e00e70697c5caaef3414502d53b1313df24 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sun, 3 Apr 2022 18:09:39 -0400
Subject: [PATCH 11/15] small speedups.

---
 include/intx/intx.hpp | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index c6a0ffb6..07b63ddd 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -1096,30 +1096,16 @@ inline constexpr bool operator!=(const T& x, const uint<N>& y) noexcept
     return uint<N>(x) != y;
 }
 
-#if !defined(_MSC_VER) || _MSC_VER < 1916  // This kills MSVC 2017 compiler.
-inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept
-{
-    auto xp = uint128{x[2], x[3]};
-    auto yp = uint128{y[2], y[3]};
-    if (xp == yp)
-    {
-        xp = uint128{x[0], x[1]};
-        yp = uint128{y[0], y[1]};
-    }
-    return xp < yp;
-}
-#endif
-
 template <unsigned N>
 inline constexpr bool operator<(const uint<N>& x, const uint<N>& y) noexcept
 {
-    for (size_t i = uint<N>::num_words; i-- > 0; ) {
+    for (size_t i = uint<N>::num_words; i-- > 1; ) {
         if (x[i] < y[i])
             return true;
         if (x[i] > y[i])
             return false;
     }
-    return false;
+    return x[0] < y[0];
 }
 
 template <unsigned N, typename T,

From e8528f4b78e3b6c60c5818b6cc98e919c30f86d8 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sun, 3 Apr 2022 18:12:16 -0400
Subject: [PATCH 12/15] fix typo

---
 include/intx/intx.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index 07b63ddd..f09b787e 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -318,7 +318,7 @@ inline constexpr bool operator<(uint128 x, uint128 y) noexcept
 #if INTX_HAS_BUILTIN_INT128
     return builtin_uint128{x} < builtin_uint128{y};
 #else
-    return x[1] < y[1] || (x[1] == y[1] && x[0] < y[0]};
+    return x[1] < y[1] || (x[1] == y[1] && x[0] < y[0]);
 #endif
 }
 

From cf27392f1434f1f2e9fcb5c4bb20e21b41cc4032 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sun, 3 Apr 2022 18:30:27 -0400
Subject: [PATCH 13/15] faster comparison

---
 include/intx/intx.hpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index f09b787e..ba105d02 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -1100,10 +1100,8 @@ template <unsigned N>
 inline constexpr bool operator<(const uint<N>& x, const uint<N>& y) noexcept
 {
     for (size_t i = uint<N>::num_words; i-- > 1; ) {
-        if (x[i] < y[i])
-            return true;
-        if (x[i] > y[i])
-            return false;
+        if (x[i] != y[i])
+            return x[i] < y[i];
     }
     return x[0] < y[0];
 }

From 7f33f768c98c3b7080615bccbf47f5d079213a3b Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sun, 3 Apr 2022 18:45:11 -0400
Subject: [PATCH 14/15] make addc/subc branchless

---
 include/intx/intx.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index ba105d02..ba044885 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -172,7 +172,7 @@ inline constexpr uint64_t addc(
 
     const auto s = x + y;
     const auto t = s + *carry;
-    *carry = (s < x) || (t < s);
+    *carry = uint64_t(s < x) | uint64_t(t < s);
     return t;
 }
 
@@ -197,7 +197,7 @@ inline constexpr uint64_t subc(
 
     const auto d = x - y;
     const auto e = d - *carry;
-    *carry = (x < y) || (d < *carry);
+    *carry = uint64_t(x < y) | uint64_t(d < *carry);
     return e;
 }
 

From 9ec355447febf6f2a11a646cff5a48080e484707 Mon Sep 17 00:00:00 2001
From: greg <greg7mdp@gmail.com>
Date: Sun, 3 Apr 2022 19:03:48 -0400
Subject: [PATCH 15/15] revert previous change so it remains branchless

---
 include/intx/intx.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
index ba044885..d037f4dd 100644
--- a/include/intx/intx.hpp
+++ b/include/intx/intx.hpp
@@ -318,7 +318,7 @@ inline constexpr bool operator<(uint128 x, uint128 y) noexcept
 #if INTX_HAS_BUILTIN_INT128
     return builtin_uint128{x} < builtin_uint128{y};
 #else
-    return x[1] < y[1] || (x[1] == y[1] && x[0] < y[0]);
+    return (unsigned{x[1] < y[1]} | (unsigned{x[1] == y[1]} & unsigned{x[0] < y[0]})) != 0;
 #endif
 }