Commit 1643d4f3 authored by Brian Kessler's avatar Brian Kessler Committed by Robert Griesemer

math/big: implement Lehmer's GCD algorithm

Updates #15833

Lehmer's GCD algorithm uses single precision calculations
to simulate several steps of multiple precision calculations
in Euclid's GCD algorithm which leads to a considerable
speed up.  This implementation uses Collins' simplified
testing condition on the single digit cosequences which
requires only one quotient and avoids any possibility of
overflow.

name                          old time/op  new time/op  delta
GCD10x10/WithoutXY-4          1.82µs ±24%  0.28µs ± 6%  -84.40%  (p=0.008 n=5+5)
GCD10x10/WithXY-4             1.69µs ± 6%  1.71µs ± 6%     ~     (p=0.595 n=5+5)
GCD10x100/WithoutXY-4         1.87µs ± 2%  0.56µs ± 4%  -70.13%  (p=0.008 n=5+5)
GCD10x100/WithXY-4            2.61µs ± 2%  2.65µs ± 4%     ~     (p=0.635 n=5+5)
GCD10x1000/WithoutXY-4        2.75µs ± 2%  1.48µs ± 1%  -46.06%  (p=0.008 n=5+5)
GCD10x1000/WithXY-4           5.29µs ± 2%  5.25µs ± 2%     ~     (p=0.548 n=5+5)
GCD10x10000/WithoutXY-4       10.7µs ± 2%  10.3µs ± 0%   -4.38%  (p=0.008 n=5+5)
GCD10x10000/WithXY-4          22.3µs ± 6%  22.1µs ± 1%     ~     (p=1.000 n=5+5)
GCD10x100000/WithoutXY-4      93.7µs ± 2%  99.4µs ± 2%   +6.09%  (p=0.008 n=5+5)
GCD10x100000/WithXY-4          196µs ± 2%   199µs ± 2%     ~     (p=0.222 n=5+5)
GCD100x100/WithoutXY-4        10.1µs ± 2%   2.5µs ± 2%  -74.84%  (p=0.008 n=5+5)
GCD100x100/WithXY-4           21.4µs ± 2%  21.3µs ± 7%     ~     (p=0.548 n=5+5)
GCD100x1000/WithoutXY-4       11.3µs ± 2%   4.4µs ± 4%  -60.87%  (p=0.008 n=5+5)
GCD100x1000/WithXY-4          24.7µs ± 3%  23.9µs ± 1%     ~     (p=0.056 n=5+5)
GCD100x10000/WithoutXY-4      26.6µs ± 1%  20.0µs ± 2%  -24.82%  (p=0.008 n=5+5)
GCD100x10000/WithXY-4         78.7µs ± 2%  78.2µs ± 2%     ~     (p=0.690 n=5+5)
GCD100x100000/WithoutXY-4      174µs ± 2%   171µs ± 1%     ~     (p=0.056 n=5+5)
GCD100x100000/WithXY-4         563µs ± 4%   561µs ± 2%     ~     (p=1.000 n=5+5)
GCD1000x1000/WithoutXY-4       120µs ± 5%    29µs ± 3%  -75.71%  (p=0.008 n=5+5)
GCD1000x1000/WithXY-4          355µs ± 4%   358µs ± 2%     ~     (p=0.841 n=5+5)
GCD1000x10000/WithoutXY-4      140µs ± 2%    49µs ± 2%  -65.07%  (p=0.008 n=5+5)
GCD1000x10000/WithXY-4         626µs ± 3%   628µs ± 9%     ~     (p=0.690 n=5+5)
GCD1000x100000/WithoutXY-4     340µs ± 4%   259µs ± 6%  -23.79%  (p=0.008 n=5+5)
GCD1000x100000/WithXY-4       3.76ms ± 4%  3.82ms ± 5%     ~     (p=0.310 n=5+5)
GCD10000x10000/WithoutXY-4    3.11ms ± 3%  0.54ms ± 2%  -82.74%  (p=0.008 n=5+5)
GCD10000x10000/WithXY-4       7.96ms ± 3%  7.69ms ± 3%     ~     (p=0.151 n=5+5)
GCD10000x100000/WithoutXY-4   3.88ms ± 1%  1.27ms ± 2%  -67.21%  (p=0.008 n=5+5)
GCD10000x100000/WithXY-4      38.1ms ± 2%  38.8ms ± 1%     ~     (p=0.095 n=5+5)
GCD100000x100000/WithoutXY-4   208ms ± 1%    25ms ± 4%  -88.07%  (p=0.008 n=5+5)
GCD100000x100000/WithXY-4      533ms ± 5%   525ms ± 4%     ~     (p=0.548 n=5+5)

Change-Id: Ic1e007eb807b93e75f4752e968e98c1f0cb90e43
Reviewed-on: https://go-review.googlesource.com/59450
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRobert Griesemer <gri@golang.org>
parent a5c44f3e
...@@ -476,7 +476,7 @@ func (z *Int) GCD(x, y, a, b *Int) *Int { ...@@ -476,7 +476,7 @@ func (z *Int) GCD(x, y, a, b *Int) *Int {
return z return z
} }
if x == nil && y == nil { if x == nil && y == nil {
return z.binaryGCD(a, b) return z.lehmerGCD(a, b)
} }
A := new(Int).Set(a) A := new(Int).Set(a)
...@@ -515,64 +515,122 @@ func (z *Int) GCD(x, y, a, b *Int) *Int { ...@@ -515,64 +515,122 @@ func (z *Int) GCD(x, y, a, b *Int) *Int {
return z return z
} }
// binaryGCD sets z to the greatest common divisor of a and b, which both must // lehmerGCD sets z to the greatest common divisor of a and b,
// be > 0, and returns z. // which both must be > 0, and returns z.
// See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm B. // See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm L.
func (z *Int) binaryGCD(a, b *Int) *Int { // This implementation uses the improved condition by Collins requiring only one
u := z // quotient and avoiding the possibility of single Word overflow.
v := new(Int) // See Jebelean, "Improving the multiprecision Euclidean algorithm",
// Design and Implementation of Symbolic Computation Systems, pp 45-58.
// use one Euclidean iteration to ensure that u and v are approx. the same size func (z *Int) lehmerGCD(a, b *Int) *Int {
switch {
case len(a.abs) > len(b.abs):
// must set v before u since u may be alias for a or b (was issue #11284)
v.Rem(a, b)
u.Set(b)
case len(a.abs) < len(b.abs):
v.Rem(b, a)
u.Set(a)
default:
v.Set(b)
u.Set(a)
}
// a, b must not be used anymore (may be aliases with u)
// v might be 0 now // ensure a >= b
if len(v.abs) == 0 { if a.abs.cmp(b.abs) < 0 {
return u a, b = b, a
} }
// u > 0 && v > 0
// determine largest k such that u = u' << k, v = v' << k // don't destroy incoming values of a and b
k := u.abs.trailingZeroBits() B := new(Int).Set(b) // must be set first in case b is an alias of z
if vk := v.abs.trailingZeroBits(); vk < k { A := z.Set(a)
k = vk
}
u.Rsh(u, k)
v.Rsh(v, k)
// determine t (we know that u > 0) // temp variables for multiprecision update
t := new(Int) t := new(Int)
if u.abs[0]&1 != 0 { r := new(Int)
// u is odd s := new(Int)
t.Neg(v) w := new(Int)
} else {
t.Set(u) // loop invariant A >= B
} for len(B.abs) > 1 {
// initialize the digits
var a1, a2, u0, u1, u2, v0, v1, v2 Word
m := len(B.abs) // m >= 2
n := len(A.abs) // n >= m >= 2
// extract the top Word of bits from A and B
h := nlz(A.abs[n-1])
a1 = (A.abs[n-1] << h) | (A.abs[n-2] >> (_W - h))
// B may have implicit zero words in the high bits if the lengths differ
switch {
case n == m:
a2 = (B.abs[n-1] << h) | (B.abs[n-2] >> (_W - h))
case n == m+1:
a2 = (B.abs[n-2] >> (_W - h))
default:
a2 = 0
}
// Since we are calculating with full words to avoid overflow,
// we use 'even' to track the sign of the cosequences.
// For even iterations: u0, v1 >= 0 && u1, v0 <= 0
// For odd iterations: u0, v1 <= 0 && u1, v0 >= 0
// The first iteration starts with k=1 (odd).
even := false
// variables to track the cosequences
u0, u1, u2 = 0, 1, 0
v0, v1, v2 = 0, 0, 1
// calculate the quotient and cosequences using Collins' stopping condition
for a2 >= v2 && a1-a2 >= v1+v2 {
q := a1 / a2
a1, a2 = a2, a1-q*a2
u0, u1, u2 = u1, u2, u1+q*u2
v0, v1, v2 = v1, v2, v1+q*v2
even = !even
}
// multiprecision step
if v0 != 0 {
// simulate the effect of the single precision steps using the cosequences
// A = u0*A + v0*B
// B = u1*A + v1*B
t.abs = t.abs.setWord(u0)
s.abs = s.abs.setWord(v0)
t.neg = !even
s.neg = even
t.Mul(A, t)
s.Mul(B, s)
r.abs = r.abs.setWord(u1)
w.abs = w.abs.setWord(v1)
r.neg = even
w.neg = !even
r.Mul(A, r)
w.Mul(B, w)
A.Add(t, s)
B.Add(r, w)
for len(t.abs) > 0 {
// reduce t
t.Rsh(t, t.abs.trailingZeroBits())
if t.neg {
v, t = t, v
v.neg = len(v.abs) > 0 && !v.neg // 0 has no sign
} else { } else {
u, t = t, u // single-digit calculations failed to simluate any quotients
// do a standard Euclidean step
t.Rem(A, B)
A, B, t = B, t, A
} }
t.Sub(u, v)
} }
return z.Lsh(u, k) if len(B.abs) > 0 {
// standard Euclidean algorithm base case for B a single Word
if len(A.abs) > 1 {
// A is longer than a single Word
t.Rem(A, B)
A, B, t = B, t, A
}
if len(B.abs) > 0 {
// A and B are both a single Word
a1, a2 := A.abs[0], B.abs[0]
for a2 != 0 {
a1, a2 = a2, a1%a2
}
A.abs[0] = a1
}
}
*z = *A
return z
} }
// Rand sets z to a pseudo-random number in [0, n) and returns z. // Rand sets z to a pseudo-random number in [0, n) and returns z.
......
...@@ -675,6 +675,37 @@ func checkGcd(aBytes, bBytes []byte) bool { ...@@ -675,6 +675,37 @@ func checkGcd(aBytes, bBytes []byte) bool {
return x.Cmp(d) == 0 return x.Cmp(d) == 0
} }
// euclidGCD is a reference implementation of Euclid's GCD
// algorithm for testing against optimized algorithms.
// Requirements: a, b > 0
func euclidGCD(a, b *Int) *Int {
A := new(Int).Set(a)
B := new(Int).Set(b)
t := new(Int)
for len(B.abs) > 0 {
// A, B = B, A % B
t.Rem(A, B)
A, B, t = B, t, A
}
return A
}
func checkLehmerGcd(aBytes, bBytes []byte) bool {
a := new(Int).SetBytes(aBytes)
b := new(Int).SetBytes(bBytes)
if a.Sign() <= 0 || b.Sign() <= 0 {
return true // can only test positive arguments
}
d := new(Int).lehmerGCD(a, b)
d0 := euclidGCD(a, b)
return d.Cmp(d0) == 0
}
var gcdTests = []struct { var gcdTests = []struct {
d, x, y, a, b string d, x, y, a, b string
}{ }{
...@@ -708,38 +739,28 @@ func testGcd(t *testing.T, d, x, y, a, b *Int) { ...@@ -708,38 +739,28 @@ func testGcd(t *testing.T, d, x, y, a, b *Int) {
D := new(Int).GCD(X, Y, a, b) D := new(Int).GCD(X, Y, a, b)
if D.Cmp(d) != 0 { if D.Cmp(d) != 0 {
t.Errorf("GCD(%s, %s): got d = %s, want %s", a, b, D, d) t.Errorf("GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, D, d)
} }
if x != nil && X.Cmp(x) != 0 { if x != nil && X.Cmp(x) != 0 {
t.Errorf("GCD(%s, %s): got x = %s, want %s", a, b, X, x) t.Errorf("GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, X, x)
} }
if y != nil && Y.Cmp(y) != 0 { if y != nil && Y.Cmp(y) != 0 {
t.Errorf("GCD(%s, %s): got y = %s, want %s", a, b, Y, y) t.Errorf("GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, Y, y)
}
// binaryGCD requires a > 0 && b > 0
if a.Sign() <= 0 || b.Sign() <= 0 {
return
}
D.binaryGCD(a, b)
if D.Cmp(d) != 0 {
t.Errorf("binaryGcd(%s, %s): got d = %s, want %s", a, b, D, d)
} }
// check results in presence of aliasing (issue #11284) // check results in presence of aliasing (issue #11284)
a2 := new(Int).Set(a) a2 := new(Int).Set(a)
b2 := new(Int).Set(b) b2 := new(Int).Set(b)
a2.binaryGCD(a2, b2) // result is same as 1st argument a2.GCD(X, Y, a2, b2) // result is same as 1st argument
if a2.Cmp(d) != 0 { if a2.Cmp(d) != 0 {
t.Errorf("binaryGcd(%s, %s): got d = %s, want %s", a, b, a2, d) t.Errorf("aliased z = a GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, a2, d)
} }
a2 = new(Int).Set(a) a2 = new(Int).Set(a)
b2 = new(Int).Set(b) b2 = new(Int).Set(b)
b2.binaryGCD(a2, b2) // result is same as 2nd argument b2.GCD(X, Y, a2, b2) // result is same as 2nd argument
if b2.Cmp(d) != 0 { if b2.Cmp(d) != 0 {
t.Errorf("binaryGcd(%s, %s): got d = %s, want %s", a, b, b2, d) t.Errorf("aliased z = b GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, b2, d)
} }
} }
...@@ -760,6 +781,10 @@ func TestGcd(t *testing.T) { ...@@ -760,6 +781,10 @@ func TestGcd(t *testing.T) {
if err := quick.Check(checkGcd, nil); err != nil { if err := quick.Check(checkGcd, nil); err != nil {
t.Error(err) t.Error(err)
} }
if err := quick.Check(checkLehmerGcd, nil); err != nil {
t.Error(err)
}
} }
type intShiftTest struct { type intShiftTest struct {
......
...@@ -422,7 +422,7 @@ func (z *Rat) norm() *Rat { ...@@ -422,7 +422,7 @@ func (z *Rat) norm() *Rat {
neg := z.a.neg neg := z.a.neg
z.a.neg = false z.a.neg = false
z.b.neg = false z.b.neg = false
if f := NewInt(0).binaryGCD(&z.a, &z.b); f.Cmp(intOne) != 0 { if f := NewInt(0).lehmerGCD(&z.a, &z.b); f.Cmp(intOne) != 0 {
z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs) z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs)
z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs) z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs)
if z.b.abs.cmp(natOne) == 0 { if z.b.abs.cmp(natOne) == 0 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment