@@ -7,10 +7,10 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol";
77 * @notice Cryptography module
88 *
99 * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
10- * this is the most efficient implementation out there, consuming ~8.1 million gas per call.
10+ * this is the most efficient implementation out there, consuming ~8.025 million gas per call.
1111 *
12- * The approach is Strauss-Shamir double scalar multiplication with 4 bits of precompute + affine coordinates.
13- * For reference, naive implementation uses ~400 billion gas, which is 48000 times more expensive.
12+ * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
13+ * For reference, naive implementation uses ~400 billion gas, which is 50000 times more expensive.
1414 *
1515 * We also tried using projective coordinates, however, the gas consumption rose to ~9 million gas.
1616 */
@@ -119,8 +119,8 @@ library ECDSA384 {
119119 {
120120 uint256 three = U384.init (3 );
121121
122- /// We use 4 -bit masks where the first 2 bits refer to `scalar1` and the last 2 bits refer to `scalar2`.
123- uint256 [2 ][16 ] memory points_ = _precomputePointsTable (
122+ /// We use 6 -bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
123+ uint256 [2 ][64 ] memory points_ = _precomputePointsTable (
124124 call,
125125 params_.p,
126126 three,
@@ -142,6 +142,8 @@ library ECDSA384 {
142142 );
143143 }
144144
145+ U384.modAssign (call, scalar1, params_.n);
146+
145147 return U384.eq (scalar1, inputs_.r);
146148 }
147149 }
@@ -185,7 +187,7 @@ library ECDSA384 {
185187 uint256 p ,
186188 uint256 three ,
187189 uint256 a ,
188- uint256 [2 ][16 ] memory points ,
190+ uint256 [2 ][64 ] memory points ,
189191 uint256 scalar1 ,
190192 uint256 scalar2
191193 ) private view returns (uint256 x , uint256 y ) {
@@ -199,15 +201,32 @@ library ECDSA384 {
199201 scalar2Bits_ := mload (scalar2)
200202 }
201203
202- for (uint256 word = 2 ; word <= 184 ; word += 2 ) {
203- (x, y) = _qaudAffine (call, p, three, a, x, y);
204+ (x, y) = _twiceAffine (call, p, three, a, x, y);
205+
206+ mask_ = ((scalar1Bits_ >> 183 ) << 3 ) | (scalar2Bits_ >> 183 );
207+
208+ if (mask_ != 0 ) {
209+ (x, y) = _addAffine (call, p, three, a, points[mask_][0 ], points[mask_][1 ], x, y);
210+ }
211+
212+ for (uint256 word = 4 ; word <= 184 ; word += 3 ) {
213+ (x, y) = _twice3Affine (call, p, three, a, x, y);
204214
205215 mask_ =
206- (((scalar1Bits_ >> (184 - word)) & 0x03 ) << 2 ) |
207- ((scalar2Bits_ >> (184 - word)) & 0x03 );
216+ (((scalar1Bits_ >> (184 - word)) & 0x07 ) << 3 ) |
217+ ((scalar2Bits_ >> (184 - word)) & 0x07 );
208218
209219 if (mask_ != 0 ) {
210- (x, y) = _addAffine (call, p, points[mask_][0 ], points[mask_][1 ], x, y);
220+ (x, y) = _addAffine (
221+ call,
222+ p,
223+ three,
224+ a,
225+ points[mask_][0 ],
226+ points[mask_][1 ],
227+ x,
228+ y
229+ );
211230 }
212231 }
213232
@@ -216,15 +235,32 @@ library ECDSA384 {
216235 scalar2Bits_ := mload (add (scalar2, 0x20 ))
217236 }
218237
219- for (uint256 word = 2 ; word <= 256 ; word += 2 ) {
220- (x, y) = _qaudAffine (call, p, three, a, x, y);
238+ (x, y) = _twiceAffine (call, p, three, a, x, y);
239+
240+ mask_ = ((scalar1Bits_ >> 255 ) << 3 ) | (scalar2Bits_ >> 255 );
241+
242+ if (mask_ != 0 ) {
243+ (x, y) = _addAffine (call, p, three, a, points[mask_][0 ], points[mask_][1 ], x, y);
244+ }
245+
246+ for (uint256 word = 4 ; word <= 256 ; word += 3 ) {
247+ (x, y) = _twice3Affine (call, p, three, a, x, y);
221248
222249 mask_ =
223- (((scalar1Bits_ >> (256 - word)) & 0x03 ) << 2 ) |
224- ((scalar2Bits_ >> (256 - word)) & 0x03 );
250+ (((scalar1Bits_ >> (256 - word)) & 0x07 ) << 3 ) |
251+ ((scalar2Bits_ >> (256 - word)) & 0x07 );
225252
226253 if (mask_ != 0 ) {
227- (x, y) = _addAffine (call, p, points[mask_][0 ], points[mask_][1 ], x, y);
254+ (x, y) = _addAffine (
255+ call,
256+ p,
257+ three,
258+ a,
259+ points[mask_][0 ],
260+ points[mask_][1 ],
261+ x,
262+ y
263+ );
228264 }
229265 }
230266 }
@@ -268,9 +304,9 @@ library ECDSA384 {
268304 }
269305
270306 /**
271- * @dev Quads an elliptic curve point in affine coordinates.
307+ * @dev Doubles an elliptic curve point 3 times in affine coordinates.
272308 */
273- function _qaudAffine (
309+ function _twice3Affine (
274310 uint256 call ,
275311 uint256 p ,
276312 uint256 three ,
@@ -321,7 +357,24 @@ library ECDSA384 {
321357 U384.modmulAssign (call, y1, m1);
322358 U384.modsubAssign (y1, y2, p);
323359
324- return (x1, y1);
360+ if (U384.eqInteger (y1, 0 )) {
361+ return (0 , 0 );
362+ }
363+
364+ U384.modexpAssignTo (call, m1, x1, 2 );
365+ U384.modmulAssign (call, m1, three);
366+ U384.modaddAssign (m1, a, p);
367+
368+ U384.modshl1AssignTo (m2, y1, p);
369+ U384.moddivAssign (call, m1, m2);
370+
371+ U384.modexpAssignTo (call, x2, m1, 2 );
372+ U384.modsubAssign (x2, x1, p);
373+ U384.modsubAssign (x2, x1, p);
374+
375+ U384.modsubAssignTo (y2, x1, x2, p);
376+ U384.modmulAssign (call, y2, m1);
377+ U384.modsubAssign (y2, y1, p);
325378 }
326379 }
327380
@@ -331,6 +384,8 @@ library ECDSA384 {
331384 function _addAffine (
332385 uint256 call ,
333386 uint256 p ,
387+ uint256 three ,
388+ uint256 a ,
334389 uint256 x1 ,
335390 uint256 y1 ,
336391 uint256 x2 ,
@@ -346,6 +401,10 @@ library ECDSA384 {
346401 }
347402
348403 if (U384.eq (x1, x2)) {
404+ if (U384.eq (y1, y2)) {
405+ return _twiceAffine (call, p, three, a, x1, y1);
406+ }
407+
349408 return (0 , 0 );
350409 }
351410
@@ -373,128 +432,49 @@ library ECDSA384 {
373432 uint256 gy ,
374433 uint256 hx ,
375434 uint256 hy
376- ) private view returns (uint256 [2 ][16 ] memory points_ ) {
377- /// 0b0100: 1G + 0H
378- (points_[0x04 ][0 ], points_[0x04 ][1 ]) = (gx.copy (), gy.copy ());
379- /// 0b1000: 2G + 0H
380- (points_[0x08 ][0 ], points_[0x08 ][1 ]) = _twiceAffine (
381- call,
382- p,
383- three,
384- a,
385- points_[0x04 ][0 ],
386- points_[0x04 ][1 ]
387- );
388- /// 0b1100: 3G + 0H
389- (points_[0x0C ][0 ], points_[0x0C ][1 ]) = _addAffine (
390- call,
391- p,
392- points_[0x04 ][0 ],
393- points_[0x04 ][1 ],
394- points_[0x08 ][0 ],
395- points_[0x08 ][1 ]
396- );
397- /// 0b0001: 0G + 1H
398- (points_[0x01 ][0 ], points_[0x01 ][1 ]) = (hx.copy (), hy.copy ());
399- /// 0b0010: 0G + 2H
400- (points_[0x02 ][0 ], points_[0x02 ][1 ]) = _twiceAffine (
401- call,
402- p,
403- three,
404- a,
405- points_[0x01 ][0 ],
406- points_[0x01 ][1 ]
407- );
408- /// 0b0011: 0G + 3H
409- (points_[0x03 ][0 ], points_[0x03 ][1 ]) = _addAffine (
410- call,
411- p,
412- points_[0x01 ][0 ],
413- points_[0x01 ][1 ],
414- points_[0x02 ][0 ],
415- points_[0x02 ][1 ]
416- );
417- /// 0b0101: 1G + 1H
418- (points_[0x05 ][0 ], points_[0x05 ][1 ]) = _addAffine (
419- call,
420- p,
421- points_[0x04 ][0 ],
422- points_[0x04 ][1 ],
423- points_[0x01 ][0 ],
424- points_[0x01 ][1 ]
425- );
426- /// 0b0110: 1G + 2H
427- (points_[0x06 ][0 ], points_[0x06 ][1 ]) = _addAffine (
428- call,
429- p,
430- points_[0x04 ][0 ],
431- points_[0x04 ][1 ],
432- points_[0x02 ][0 ],
433- points_[0x02 ][1 ]
434- );
435- /// 0b0111: 1G + 3H
436- (points_[0x07 ][0 ], points_[0x07 ][1 ]) = _addAffine (
437- call,
438- p,
439- points_[0x04 ][0 ],
440- points_[0x04 ][1 ],
441- points_[0x03 ][0 ],
442- points_[0x03 ][1 ]
443- );
444- /// 0b1001: 2G + 1H
445- (points_[0x09 ][0 ], points_[0x09 ][1 ]) = _addAffine (
446- call,
447- p,
448- points_[0x08 ][0 ],
449- points_[0x08 ][1 ],
450- points_[0x01 ][0 ],
451- points_[0x01 ][1 ]
452- );
453- /// 0b1010: 2G + 2H
454- (points_[0x0A ][0 ], points_[0x0A ][1 ]) = _addAffine (
455- call,
456- p,
457- points_[0x08 ][0 ],
458- points_[0x08 ][1 ],
459- points_[0x02 ][0 ],
460- points_[0x02 ][1 ]
461- );
462- /// 0b1011: 2G + 3H
463- (points_[0x0B ][0 ], points_[0x0B ][1 ]) = _addAffine (
464- call,
465- p,
466- points_[0x08 ][0 ],
467- points_[0x08 ][1 ],
468- points_[0x03 ][0 ],
469- points_[0x03 ][1 ]
470- );
471- /// 0b1101: 3G + 1H
472- (points_[0x0D ][0 ], points_[0x0D ][1 ]) = _addAffine (
473- call,
474- p,
475- points_[0x0C ][0 ],
476- points_[0x0C ][1 ],
477- points_[0x01 ][0 ],
478- points_[0x01 ][1 ]
479- );
480- /// 0b1110: 3G + 2H
481- (points_[0x0E ][0 ], points_[0x0E ][1 ]) = _addAffine (
482- call,
483- p,
484- points_[0x0C ][0 ],
485- points_[0x0C ][1 ],
486- points_[0x02 ][0 ],
487- points_[0x02 ][1 ]
488- );
489- /// 0b1111: 3G + 3H
490- (points_[0x0F ][0 ], points_[0x0F ][1 ]) = _addAffine (
491- call,
492- p,
493- points_[0x0C ][0 ],
494- points_[0x0C ][1 ],
495- points_[0x03 ][0 ],
496- points_[0x03 ][1 ]
497- );
435+ ) private view returns (uint256 [2 ][64 ] memory points_ ) {
436+ unchecked {
437+ (points_[0x01 ][0 ], points_[0x01 ][1 ]) = (hx.copy (), hy.copy ());
438+ (points_[0x08 ][0 ], points_[0x08 ][1 ]) = (gx.copy (), gy.copy ());
439+
440+ for (uint256 i = 0 ; i < 8 ; ++ i) {
441+ for (uint256 j = 0 ; j < 8 ; ++ j) {
442+ if (i + j < 2 ) {
443+ continue ;
444+ }
445+
446+ uint256 maskTo = (i << 3 ) | j;
447+
448+ if (i != 0 ) {
449+ uint256 maskFrom = ((i - 1 ) << 3 ) | j;
450+
451+ (points_[maskTo][0 ], points_[maskTo][1 ]) = _addAffine (
452+ call,
453+ p,
454+ three,
455+ a,
456+ points_[maskFrom][0 ],
457+ points_[maskFrom][1 ],
458+ gx,
459+ gy
460+ );
461+ } else {
462+ uint256 maskFrom = (i << 3 ) | (j - 1 );
463+
464+ (points_[maskTo][0 ], points_[maskTo][1 ]) = _addAffine (
465+ call,
466+ p,
467+ three,
468+ a,
469+ points_[maskFrom][0 ],
470+ points_[maskFrom][1 ],
471+ hx,
472+ hy
473+ );
474+ }
475+ }
476+ }
477+ }
498478 }
499479}
500480
@@ -657,6 +637,21 @@ library U384 {
657637 }
658638 }
659639
640+ function modAssign (uint256 call_ , uint256 a_ , uint256 m_ ) internal view {
641+ assembly {
642+ mstore (call_, 0x40 )
643+ mstore (add (0x20 , call_), 0x20 )
644+ mstore (add (0x40 , call_), 0x40 )
645+ mstore (add (0x60 , call_), mload (a_))
646+ mstore (add (0x80 , call_), mload (add (a_, 0x20 )))
647+ mstore (add (0xA0 , call_), 0x01 )
648+ mstore (add (0xC0 , call_), mload (m_))
649+ mstore (add (0xE0 , call_), mload (add (m_, 0x20 )))
650+
651+ pop (staticcall (gas (), 0x5 , call_, 0x0100 , a_, 0x40 ))
652+ }
653+ }
654+
660655 function modexp (
661656 uint256 call_ ,
662657 uint256 b_ ,
0 commit comments