diff --git a/src/math/big/arith.go b/src/math/big/arith.go index b0885f261fe9ba..e621e928f10b6d 100644 --- a/src/math/big/arith.go +++ b/src/math/big/arith.go @@ -214,3 +214,20 @@ func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) { } return } +func divWWByInv_g(x1, x0, y Word, inv uint, shift uint) (q, r Word) { + if shift != 0 { + x1 = (x1<>(bits.UintSize-shift)) + x0 <<= shift + y <<= shift + } + qq, rr := bits.DivByInv(uint(x1), uint(x0), uint(y), uint(inv)) + rr >>= shift + return Word(qq), Word(rr) +} +func divWVWByInv_g(z []Word, xn Word, x []Word, y Word) (r Word) { + inv, shift := bits.GetInvert(uint(y)) + for i := len(z) - 1; i >= 0; i-- { + z[i], r = divWWByInv_g(r, x[i], y, inv, shift) + } + return r +} diff --git a/src/math/big/arith_decl.go b/src/math/big/arith_decl.go index 41e592334c376e..54a2d739e9407b 100644 --- a/src/math/big/arith_decl.go +++ b/src/math/big/arith_decl.go @@ -18,3 +18,11 @@ func shrVU(z, x []Word, s uint) (c Word) func mulAddVWW(z, x []Word, y, r Word) (c Word) func addMulVVW(z, x []Word, y Word) (c Word) func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) + +//TODO:implemented divWWByInv and divWVWByInv in arith_$GOARCH.s +func divWWByInv(x1, x0, y Word, inv uint, shift uint) (q, r Word) { + return divWWByInv_g(x1, x0, y, inv, shift) +} +func divWVWByInv(z []Word, xn Word, x []Word, y Word) (r Word) { + return divWVW_g(z, xn, x, y) +} diff --git a/src/math/big/nat.go b/src/math/big/nat.go index 6a3989bf9d82bf..8b2cce3ac04395 100644 --- a/src/math/big/nat.go +++ b/src/math/big/nat.go @@ -646,7 +646,7 @@ func (z nat) divW(x nat, y Word) (q nat, r Word) { } // m > 0 z = z.make(m) - r = divWVW(z, 0, x, y) + r = divWVWByInv(z, 0, x, y) q = z.norm() return } @@ -751,6 +751,7 @@ func (q nat) divBasic(u, v nat) { // D2. vn1 := v[n-1] + inv,shift:=bits.GetInvert(uint(vn1)) for j := m; j >= 0; j-- { // D3. qhat := Word(_M) @@ -760,7 +761,7 @@ func (q nat) divBasic(u, v nat) { } if ujn != vn1 { var rhat Word - qhat, rhat = divWW(ujn, u[j+n-1], vn1) + qhat, rhat = divWWByInv(ujn, u[j+n-1], vn1,inv,shift) // x1 | x2 = q̂v_{n-2} vn2 := v[n-2] @@ -1179,7 +1180,7 @@ func (x nat) modW(d Word) (r Word) { // TODO(agl): we don't actually need to store the q value. var q nat q = q.make(len(x)) - return divWVW(q, 0, x, d) + return divWVWByInv(q, 0, x, d) } // random creates a random integer in [0..limit), using the space in z if diff --git a/src/math/bits/bits.go b/src/math/bits/bits.go index 879ef2da5414f5..c8c6dde04574d8 100644 --- a/src/math/bits/bits.go +++ b/src/math/bits/bits.go @@ -586,3 +586,74 @@ func Rem64(hi, lo, y uint64) uint64 { _, rem := Div64(hi%y, lo, y) return rem } +func GetInvert(d1 uint) (inv uint, shift uint) { + nlzx, nlzc := d1, uint(0) + const SHIFT_BITS int = 8 + if uintSize > SHIFT_BITS { + for ; (nlzx & (uint(0xff) << (uintSize - 8))) == 0; nlzc += 8 { + nlzx <<= SHIFT_BITS + } + } + for ; (nlzx & (uint(1) << (uintSize - 1))) == 0; nlzc++ { + nlzx <<= 1 + } + shift = nlzc + if shift > 0 { + d1 = (d1 << shift) + } + mask := uint((1 << (UintSize / 2)) - 1) + max := ^uint(0) + u1 := d1 + ul := u1 & mask + uh := u1 >> (uintSize / 2) + + qh := (u1 ^ max) / uh + + r := ((^u1 - qh*uh) << (uintSize / 2)) | mask + p := qh * ul + + if r < p { + qh-- + r += u1 + if r >= u1 { + if r < p { + qh-- + r += u1 + } + } + } + r -= p + p = (r>>(uintSize/2))*qh + r + ql := (p >> (uintSize / 2)) + 1 + r = (r << (uintSize / 2)) + mask - ql*u1 + if r >= (max & (p << (uintSize / 2))) { + ql-- + r += u1 + } + inv = (qh << (uintSize / 2)) + ql + if r >= u1 { + inv++ + r -= u1 + } + return +} +func DivByInv(n2, n1, d, inv uint) (q uint, r uint) { + q, q0 := Mul(n2, inv) + q0, cc := Add(q0, n1, 0) + q, cc = Add(q, n2, cc) + + r = n1 - d*q + r -= d + q++ + + if r >= q0 { + q-- + r += d + } + + if r >= d { + q++ + r -= d + } + return +} \ No newline at end of file