From e53dc061b618019fbe805e8318806f2f4ab77529 Mon Sep 17 00:00:00 2001 From: Duncan Townsend Date: Fri, 27 Feb 2026 10:34:32 +0100 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=EF=B8=8FGolf=20`sqrt`=20and=20`cb?= =?UTF-8?q?rt`=20by=20getting=20a=20better=20initial=20estimate=20and=20re?= =?UTF-8?q?moving=20an=20iteration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/clz/FixedPointMathLib.sol | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/utils/clz/FixedPointMathLib.sol b/src/utils/clz/FixedPointMathLib.sol index ba7692115..47da0618c 100644 --- a/src/utils/clz/FixedPointMathLib.sol +++ b/src/utils/clz/FixedPointMathLib.sol @@ -774,14 +774,16 @@ library FixedPointMathLib { // n = floor(log2(x)) // For x ≈ 2^n, we know sqrt(x) ≈ 2^(n/2) // We use (n+1)/2 instead of n/2 to round up slightly - // This gives a better initial approximation + // This gives a better initial approximation. This seed gives + // ε₁ = 0.0607 after one Babylonian step for all inputs. With + // ε_{n+1} ≈ ε²/2, 6 steps yield 2⁻¹⁶⁰ relative error (>128 correct + // bits). // // Formula: z = 2^((n+1)/2) = 2^(floor((n+1)/2)) // Implemented as: z = 1 << ((n+1) >> 1) z := shl(shr(1, sub(256, clz(x))), 1) - /// (x/z + z) / 2 - z := shr(1, add(z, div(x, z))) + // 6 Babylonian steps; z = (x/z + z) / 2 z := shr(1, add(z, div(x, z))) z := shr(1, add(z, div(x, z))) z := shr(1, add(z, div(x, z))) @@ -799,23 +801,25 @@ library FixedPointMathLib { /// @dev Returns the cube root of `x`, rounded down. /// Credit to bout3fiddy and pcaversaccio under AGPLv3 license: /// https://github.com/pcaversaccio/snekmate/blob/main/src/snekmate/utils/math.vy - /// Formally verified by xuwinnie: - /// https://github.com/vectorized/solady/blob/main/audits/xuwinnie-solady-cbrt-proof.pdf function cbrt(uint256 x) internal pure returns (uint256 z) { /// @solidity memory-safe-assembly assembly { - // Initial guess z = 2^ceil((log2(x) + 2) / 3). - // Since log2(x) = 255 - clz(x), the expression shl((257 - clz(x)) / 3, 1) - // computes this over-estimate. Guaranteed ≥ cbrt(x) and safe for Newton-Raphson's. - z := shl(div(sub(257, clz(x)), 3), 1) - // Newton-Raphson's. - z := div(add(add(div(x, mul(z, z)), z), z), 3) + // Initial guess z ≈ ∛(3/4) · 2𐞥 where q = ⌊(257 − clz(x)) / 3⌋. The + // multiplier 233/256 ≈ 0.909 ≈ ∛(3/4) balances the worst-case + // over/underestimate across each octave triplet (ε_over ≈ 0.445, + // ε_under ≈ −0.278), giving >85 bits of precision after 6 N-R + // iterations. The `lt(0, x)` term ensures z ≥ 1 when x > 0 (the + // `shr` can produce 0 for small `q`) + z := add(shr(8, shl(div(sub(257, clz(x)), 3), 233)), lt(0, x)) + + // 6 Newton-Raphson iterations. z := div(add(add(div(x, mul(z, z)), z), z), 3) z := div(add(add(div(x, mul(z, z)), z), z), 3) z := div(add(add(div(x, mul(z, z)), z), z), 3) z := div(add(add(div(x, mul(z, z)), z), z), 3) z := div(add(add(div(x, mul(z, z)), z), z), 3) z := div(add(add(div(x, mul(z, z)), z), z), 3) + // Round down. z := sub(z, lt(div(x, mul(z, z)), z)) } From e29ead0449fb269c9289d6204b821a292fc2f255 Mon Sep 17 00:00:00 2001 From: Duncan Townsend Date: Sat, 28 Feb 2026 10:42:54 +0100 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9A=A1=EF=B8=8FGolf=20`cbrt`=20a=20littl?= =?UTF-8?q?e=20bit=20more=20by=20cutting=20corners=20during=20initializati?= =?UTF-8?q?on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/clz/FixedPointMathLib.sol | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/clz/FixedPointMathLib.sol b/src/utils/clz/FixedPointMathLib.sol index 47da0618c..c349e0b59 100644 --- a/src/utils/clz/FixedPointMathLib.sol +++ b/src/utils/clz/FixedPointMathLib.sol @@ -808,9 +808,9 @@ library FixedPointMathLib { // multiplier 233/256 ≈ 0.909 ≈ ∛(3/4) balances the worst-case // over/underestimate across each octave triplet (ε_over ≈ 0.445, // ε_under ≈ −0.278), giving >85 bits of precision after 6 N-R - // iterations. The `lt(0, x)` term ensures z ≥ 1 when x > 0 (the + // iterations. The `add(..., 1)` term ensures z ≥ 1 when x > 0 (the // `shr` can produce 0 for small `q`) - z := add(shr(8, shl(div(sub(257, clz(x)), 3), 233)), lt(0, x)) + z := add(shr(8, shl(div(sub(257, clz(x)), 3), 233)), 1) // 6 Newton-Raphson iterations. z := div(add(add(div(x, mul(z, z)), z), z), 3)