Commit edaa0ffa authored by Brian Kessler's avatar Brian Kessler Committed by Robert Griesemer

math/big: use internal sqr on nats

Replace z.mul(x, x) calls on nats in internal code with z.sqr(x)
that employs optimized squaring routines. Benchmark results:

Exp-4                             12.9ms ± 2%  12.8ms ± 3%     ~     (p=0.165 n=10+10)
Exp2-4                            13.0ms ± 4%  12.8ms ± 2%   -2.14%  (p=0.015 n=8+9)
ModSqrt225_Tonelli-4               987µs ± 4%   989µs ± 2%     ~     (p=0.673 n=8+9)
ModSqrt224_3Mod4-4                 300µs ± 2%   301µs ± 3%     ~     (p=0.546 n=9+9)
ModSqrt5430_Tonelli-4              4.88s ± 6%   4.82s ± 5%     ~     (p=0.247 n=10+10)
ModSqrt5430_3Mod4-4                1.62s ±10%   1.57s ± 1%     ~     (p=0.094 n=9+9)
Exp3Power/0x10-4                   496ns ± 7%   426ns ± 7%  -14.21%  (p=0.000 n=10+10)
Exp3Power/0x40-4                   575ns ± 5%   470ns ± 7%  -18.20%  (p=0.000 n=9+10)
Exp3Power/0x100-4                  929ns ±19%   770ns ±10%  -17.13%  (p=0.000 n=10+10)
Exp3Power/0x400-4                 1.96µs ± 7%  1.79µs ± 5%   -8.68%  (p=0.000 n=10+10)
Exp3Power/0x1000-4                10.9µs ± 9%   7.9µs ± 5%  -28.02%  (p=0.000 n=10+10)
Exp3Power/0x4000-4                86.8µs ± 8%  67.3µs ± 8%  -22.41%  (p=0.000 n=10+10)
Exp3Power/0x10000-4                750µs ± 8%   731µs ± 1%     ~     (p=0.074 n=9+8)
Exp3Power/0x40000-4               7.07ms ± 7%  7.05ms ± 4%     ~     (p=0.931 n=9+9)
Exp3Power/0x100000-4              64.7ms ± 2%  65.6ms ± 6%     ~     (p=0.661 n=9+10)
Exp3Power/0x400000-4               577ms ± 2%   580ms ± 3%     ~     (p=0.931 n=9+9)
ProbablyPrime/n=0-4               9.08ms ±17%  9.09ms ±16%     ~     (p=0.447 n=9+10)
ProbablyPrime/n=1-4               10.8ms ± 4%  10.7ms ± 2%     ~     (p=0.243 n=10+9)
ProbablyPrime/n=5-4               18.5ms ± 3%  18.5ms ± 1%     ~     (p=0.863 n=9+9)
ProbablyPrime/n=10-4              28.6ms ± 6%  28.2ms ± 1%     ~     (p=0.050 n=9+9)
ProbablyPrime/n=20-4              48.4ms ± 4%  48.4ms ± 2%     ~     (p=0.739 n=10+10)
ProbablyPrime/Lucas-4             6.75ms ± 4%  6.75ms ± 2%     ~     (p=0.963 n=9+8)
ProbablyPrime/MillerRabinBase2-4  2.00ms ± 5%  2.00ms ± 7%     ~     (p=0.931 n=9+9)

Change-Id: Ibe9f58d11dbad25eb369faedf480b666a0250a6b
Reviewed-on: https://go-review.googlesource.com/56773Reviewed-by: default avatarRobert Griesemer <gri@golang.org>
parent 06a78b57
......@@ -991,7 +991,7 @@ func (z nat) expNN(x, y, m nat) nat {
// otherwise the arguments would alias.
var zz, r nat
for j := 0; j < w; j++ {
zz = zz.mul(z, z)
zz = zz.sqr(z)
zz, z = z, zz
if v&mask != 0 {
......@@ -1011,7 +1011,7 @@ func (z nat) expNN(x, y, m nat) nat {
v = y[i]
for j := 0; j < _W; j++ {
zz = zz.mul(z, z)
zz = zz.sqr(z)
zz, z = z, zz
if v&mask != 0 {
......@@ -1044,7 +1044,7 @@ func (z nat) expNNWindowed(x, y, m nat) nat {
powers[1] = x
for i := 2; i < 1<<n; i += 2 {
p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
*p = p.mul(*p2, *p2)
*p = p.sqr(*p2)
zz, r = zz.div(r, *p, m)
*p, r = r, *p
*p1 = p1.mul(*p, x)
......@@ -1061,22 +1061,22 @@ func (z nat) expNNWindowed(x, y, m nat) nat {
// Unrolled loop for significant performance
// gain. Use go test -bench=".*" in crypto/rsa
// to check performance before making changes.
zz = zz.mul(z, z)
zz = zz.sqr(z)
zz, z = z, zz
zz, r = zz.div(r, z, m)
z, r = r, z
zz = zz.mul(z, z)
zz = zz.sqr(z)
zz, z = z, zz
zz, r = zz.div(r, z, m)
z, r = r, z
zz = zz.mul(z, z)
zz = zz.sqr(z)
zz, z = z, zz
zz, r = zz.div(r, z, m)
z, r = r, z
zz = zz.mul(z, z)
zz = zz.sqr(z)
zz, z = z, zz
zz, r = zz.div(r, z, m)
z, r = r, z
......
......@@ -469,7 +469,7 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor {
table[0].bbb = nat(nil).expWW(bb, Word(leafSize))
table[0].ndigits = ndigits * leafSize
} else {
table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
table[i].bbb = nat(nil).sqr(table[i-1].bbb)
table[i].ndigits = 2 * table[i-1].ndigits
}
......
......@@ -108,7 +108,7 @@ NextRandom:
continue
}
for j := uint(1); j < k; j++ {
y = y.mul(y, y)
y = y.sqr(y)
quotient, y = quotient.div(y, y, n)
if y.cmp(nm1) == 0 {
continue NextRandom
......@@ -194,7 +194,7 @@ func (n nat) probablyPrimeLucas() bool {
// If n is a non-square we expect to find a d in just a few attempts on average.
// After 40 attempts, take a moment to check if n is indeed a square.
t1 = t1.sqrt(n)
t1 = t1.mul(t1, t1)
t1 = t1.sqr(t1)
if t1.cmp(n) == 0 {
return false
}
......@@ -259,7 +259,7 @@ func (n nat) probablyPrimeLucas() bool {
t1 = t1.sub(t1, natP)
t2, vk = t2.div(vk, t1, n)
// V(k'+1) = V(2k+2) = V(k+1)² - 2.
t1 = t1.mul(vk1, vk1)
t1 = t1.sqr(vk1)
t1 = t1.add(t1, nm2)
t2, vk1 = t2.div(vk1, t1, n)
} else {
......@@ -270,7 +270,7 @@ func (n nat) probablyPrimeLucas() bool {
t1 = t1.sub(t1, natP)
t2, vk1 = t2.div(vk1, t1, n)
// V(k') = V(2k) = V(k)² - 2
t1 = t1.mul(vk, vk)
t1 = t1.sqr(vk)
t1 = t1.add(t1, nm2)
t2, vk = t2.div(vk, t1, n)
}
......@@ -312,7 +312,7 @@ func (n nat) probablyPrimeLucas() bool {
}
// k' = 2k
// V(k') = V(2k) = V(k)² - 2
t1 = t1.mul(vk, vk)
t1 = t1.sqr(vk)
t1 = t1.sub(t1, natTwo)
t2, vk = t2.div(vk, t1, n)
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment