Commit 187afdeb authored by Aliaksandr Valialkin's avatar Aliaksandr Valialkin Committed by Brad Fitzpatrick

math/big: re-use memory in Int.GCD

This improves TLS handshake performance.

benchmark                                 old ns/op     new ns/op     delta
BenchmarkGCD10x10/WithoutXY-4             965           968           +0.31%
BenchmarkGCD10x10/WithXY-4                1813          1391          -23.28%
BenchmarkGCD10x100/WithoutXY-4            1093          1075          -1.65%
BenchmarkGCD10x100/WithXY-4               2348          1676          -28.62%
BenchmarkGCD10x1000/WithoutXY-4           1569          1565          -0.25%
BenchmarkGCD10x1000/WithXY-4              4262          3242          -23.93%
BenchmarkGCD10x10000/WithoutXY-4          6069          6066          -0.05%
BenchmarkGCD10x10000/WithXY-4             12123         11331         -6.53%
BenchmarkGCD10x100000/WithoutXY-4         52664         52610         -0.10%
BenchmarkGCD10x100000/WithXY-4            97494         95649         -1.89%
BenchmarkGCD100x100/WithoutXY-4           5244          5228          -0.31%
BenchmarkGCD100x100/WithXY-4              22572         18630         -17.46%
BenchmarkGCD100x1000/WithoutXY-4          6143          6233          +1.47%
BenchmarkGCD100x1000/WithXY-4             24652         19357         -21.48%
BenchmarkGCD100x10000/WithoutXY-4         15725         15804         +0.50%
BenchmarkGCD100x10000/WithXY-4            60552         55973         -7.56%
BenchmarkGCD100x100000/WithoutXY-4        107008        107853        +0.79%
BenchmarkGCD100x100000/WithXY-4           349597        340994        -2.46%
BenchmarkGCD1000x1000/WithoutXY-4         63785         64434         +1.02%
BenchmarkGCD1000x1000/WithXY-4            373186        334035        -10.49%
BenchmarkGCD1000x10000/WithoutXY-4        78038         78241         +0.26%
BenchmarkGCD1000x10000/WithXY-4           543692        507034        -6.74%
BenchmarkGCD1000x100000/WithoutXY-4       205607        207727        +1.03%
BenchmarkGCD1000x100000/WithXY-4          2488113       2415323       -2.93%
BenchmarkGCD10000x10000/WithoutXY-4       1731340       1714992       -0.94%
BenchmarkGCD10000x10000/WithXY-4          10601046      7111329       -32.92%
BenchmarkGCD10000x100000/WithoutXY-4      2239155       2212173       -1.21%
BenchmarkGCD10000x100000/WithXY-4         30097040      26538887      -11.82%
BenchmarkGCD100000x100000/WithoutXY-4     119845326     119863916     +0.02%
BenchmarkGCD100000x100000/WithXY-4        768006543     426795966     -44.43%

benchmark                                 old allocs     new allocs     delta
BenchmarkGCD10x10/WithoutXY-4             5              5              +0.00%
BenchmarkGCD10x10/WithXY-4                17             9              -47.06%
BenchmarkGCD10x100/WithoutXY-4            6              6              +0.00%
BenchmarkGCD10x100/WithXY-4               21             9              -57.14%
BenchmarkGCD10x1000/WithoutXY-4           6              6              +0.00%
BenchmarkGCD10x1000/WithXY-4              30             12             -60.00%
BenchmarkGCD10x10000/WithoutXY-4          6              6              +0.00%
BenchmarkGCD10x10000/WithXY-4             26             12             -53.85%
BenchmarkGCD10x100000/WithoutXY-4         6              6              +0.00%
BenchmarkGCD10x100000/WithXY-4            28             12             -57.14%
BenchmarkGCD100x100/WithoutXY-4           5              5              +0.00%
BenchmarkGCD100x100/WithXY-4              183            61             -66.67%
BenchmarkGCD100x1000/WithoutXY-4          8              8              +0.00%
BenchmarkGCD100x1000/WithXY-4             170            47             -72.35%
BenchmarkGCD100x10000/WithoutXY-4         8              8              +0.00%
BenchmarkGCD100x10000/WithXY-4            200            67             -66.50%
BenchmarkGCD100x100000/WithoutXY-4        8              8              +0.00%
BenchmarkGCD100x100000/WithXY-4           188            65             -65.43%
BenchmarkGCD1000x1000/WithoutXY-4         5              5              +0.00%
BenchmarkGCD1000x1000/WithXY-4            2435           1193           -51.01%
BenchmarkGCD1000x10000/WithoutXY-4        8              8              +0.00%
BenchmarkGCD1000x10000/WithXY-4           2211           1076           -51.33%
BenchmarkGCD1000x100000/WithoutXY-4       8              8              +0.00%
BenchmarkGCD1000x100000/WithXY-4          2271           1108           -51.21%
BenchmarkGCD10000x10000/WithoutXY-4       5              5              +0.00%
BenchmarkGCD10000x10000/WithXY-4          23183          11605          -49.94%
BenchmarkGCD10000x100000/WithoutXY-4      8              8              +0.00%
BenchmarkGCD10000x100000/WithXY-4         23421          11717          -49.97%
BenchmarkGCD100000x100000/WithoutXY-4     5              5              +0.00%
BenchmarkGCD100000x100000/WithXY-4        232976         116815         -49.86%

benchmark                                 old bytes      new bytes     delta
BenchmarkGCD10x10/WithoutXY-4             208            208           +0.00%
BenchmarkGCD10x10/WithXY-4                736            432           -41.30%
BenchmarkGCD10x100/WithoutXY-4            256            256           +0.00%
BenchmarkGCD10x100/WithXY-4               896            432           -51.79%
BenchmarkGCD10x1000/WithoutXY-4           368            368           +0.00%
BenchmarkGCD10x1000/WithXY-4              1856           1152          -37.93%
BenchmarkGCD10x10000/WithoutXY-4          1616           1616          +0.00%
BenchmarkGCD10x10000/WithXY-4             7920           7376          -6.87%
BenchmarkGCD10x100000/WithoutXY-4         13776          13776         +0.00%
BenchmarkGCD10x100000/WithXY-4            68800          68176         -0.91%
BenchmarkGCD100x100/WithoutXY-4           208            208           +0.00%
BenchmarkGCD100x100/WithXY-4              6960           2112          -69.66%
BenchmarkGCD100x1000/WithoutXY-4          544            560           +2.94%
BenchmarkGCD100x1000/WithXY-4             7280           2400          -67.03%
BenchmarkGCD100x10000/WithoutXY-4         2896           2912          +0.55%
BenchmarkGCD100x10000/WithXY-4            15280          10002         -34.54%
BenchmarkGCD100x100000/WithoutXY-4        27344          27365         +0.08%
BenchmarkGCD100x100000/WithXY-4           88288          83427         -5.51%
BenchmarkGCD1000x1000/WithoutXY-4         544            544           +0.00%
BenchmarkGCD1000x1000/WithXY-4            178288         40043         -77.54%
BenchmarkGCD1000x10000/WithoutXY-4        3344           3136          -6.22%
BenchmarkGCD1000x10000/WithXY-4           188720         54432         -71.16%
BenchmarkGCD1000x100000/WithoutXY-4       27792          27592         -0.72%
BenchmarkGCD1000x100000/WithXY-4          373872         239447        -35.95%
BenchmarkGCD10000x10000/WithoutXY-4       4288           4288          +0.00%
BenchmarkGCD10000x10000/WithXY-4          11935584       481875        -95.96%
BenchmarkGCD10000x100000/WithoutXY-4      31296          28834         -7.87%
BenchmarkGCD10000x100000/WithXY-4         13237088       1662620       -87.44%
BenchmarkGCD100000x100000/WithoutXY-4     40768          40768         +0.00%
BenchmarkGCD100000x100000/WithXY-4        1165518864     14256010      -98.78%

Change-Id: I652b3244bd074a03f3bc9a87c282330f9e5f1507
Reviewed-on: https://go-review.googlesource.com/21506Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 1e7c61d8
......@@ -20,13 +20,27 @@ func randInt(r *rand.Rand, size uint) *Int {
}
func runGCD(b *testing.B, aSize, bSize uint) {
b.Run("WithoutXY", func(b *testing.B) {
runGCDExt(b, aSize, bSize, false)
})
b.Run("WithXY", func(b *testing.B) {
runGCDExt(b, aSize, bSize, true)
})
}
func runGCDExt(b *testing.B, aSize, bSize uint, calcXY bool) {
b.StopTimer()
var r = rand.New(rand.NewSource(1234))
aa := randInt(r, aSize)
bb := randInt(r, bSize)
var x, y *Int
if calcXY {
x = new(Int)
y = new(Int)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
new(Int).GCD(nil, nil, aa, bb)
new(Int).GCD(x, y, aa, bb)
}
}
......
......@@ -459,11 +459,11 @@ func (z *Int) GCD(x, y, a, b *Int) *Int {
q := new(Int)
temp := new(Int)
r := new(Int)
for len(B.abs) > 0 {
r := new(Int)
q, r = q.QuoRem(A, B, r)
A, B = B, r
A, B, r = B, r, A
temp.Set(X)
X.Mul(X, q)
......
......@@ -8,7 +8,10 @@
package big
import "math/rand"
import (
"math/rand"
"sync"
)
// An unsigned integer x of the form
//
......@@ -539,6 +542,21 @@ func (z nat) div(z2, u, v nat) (q, r nat) {
return
}
// getNat returns a nat of len n. The contents may not be zero.
func getNat(n int) nat {
var z nat
if v := natPool.Get(); v != nil {
z = v.(nat)
}
return z.make(n)
}
func putNat(x nat) {
natPool.Put(x)
}
var natPool sync.Pool
// q = (uIn-r)/v, with 0 <= r < y
// Uses z as storage for q, and u as storage for r if possible.
// See Knuth, Volume 2, section 4.3.1, Algorithm D.
......@@ -557,7 +575,7 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
}
q = z.make(m + 1)
qhatv := make(nat, n+1)
qhatv := getNat(n + 1)
if alias(u, uIn) || alias(u, v) {
u = nil // u is an alias for uIn or v - cannot reuse
}
......@@ -565,10 +583,11 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
u.clear() // TODO(gri) no need to clear if we allocated a new u
// D1.
var v1 nat
shift := nlz(v[n-1])
if shift > 0 {
// do not modify v, it may be used by another goroutine simultaneously
v1 := make(nat, n)
v1 = getNat(n)
shlVU(v1, v, shift)
v = v1
}
......@@ -609,6 +628,10 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
q[j] = qhat
}
if v1 != nil {
putNat(v1)
}
putNat(qhatv)
q = q.norm()
shrVU(u, u, shift)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment