Commit f8892fb3 authored by Carl Mastrangelo's avatar Carl Mastrangelo Committed by Dave Cheney

crypto/sha1: Make sha-1 do block mixup in place

Benchmarks:

benchmark              old ns/op    new ns/op    delta
BenchmarkHash8Bytes          762          674  -11.55%
BenchmarkHash1K             8791         7375  -16.11%
BenchmarkHash8K            65094        54881  -15.69%

benchmark               old MB/s     new MB/s  speedup
BenchmarkHash8Bytes        10.50        11.86    1.13x
BenchmarkHash1K           116.48       138.84    1.19x
BenchmarkHash8K           125.85       149.27    1.19x

R=dave, rsc, iant
CC=golang-dev
https://golang.org/cl/6820096
parent 451b91b4
...@@ -16,7 +16,7 @@ const ( ...@@ -16,7 +16,7 @@ const (
) )
func block(dig *digest, p []byte) { func block(dig *digest, p []byte) {
var w [80]uint32 var w [16]uint32
h0, h1, h2, h3, h4 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4] h0, h1, h2, h3, h4 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4]
for len(p) >= chunk { for len(p) >= chunk {
...@@ -26,42 +26,56 @@ func block(dig *digest, p []byte) { ...@@ -26,42 +26,56 @@ func block(dig *digest, p []byte) {
j := i * 4 j := i * 4
w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3]) w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3])
} }
for i := 16; i < 80; i++ {
tmp := w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]
w[i] = tmp<<1 | tmp>>(32-1)
}
a, b, c, d, e := h0, h1, h2, h3, h4 a, b, c, d, e := h0, h1, h2, h3, h4
// Each of the four 20-iteration rounds // Each of the four 20-iteration rounds
// differs only in the computation of f and // differs only in the computation of f and
// the choice of K (_K0, _K1, etc). // the choice of K (_K0, _K1, etc).
for i := 0; i < 20; i++ { i := 0
for ; i < 16; i++ {
f := b&c | (^b)&d
a5 := a<<5 | a>>(32-5)
b30 := b<<30 | b>>(32-30)
t := a5 + f + e + w[i&0xf] + _K0
a, b, c, d, e = t, a, b30, c, d
}
for ; i < 20; i++ {
tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
w[i&0xf] = tmp<<1 | tmp>>(32-1)
f := b&c | (^b)&d f := b&c | (^b)&d
a5 := a<<5 | a>>(32-5) a5 := a<<5 | a>>(32-5)
b30 := b<<30 | b>>(32-30) b30 := b<<30 | b>>(32-30)
t := a5 + f + e + w[i] + _K0 t := a5 + f + e + w[i&0xf] + _K0
a, b, c, d, e = t, a, b30, c, d a, b, c, d, e = t, a, b30, c, d
} }
for i := 20; i < 40; i++ { for ; i < 40; i++ {
tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
w[i&0xf] = tmp<<1 | tmp>>(32-1)
f := b ^ c ^ d f := b ^ c ^ d
a5 := a<<5 | a>>(32-5) a5 := a<<5 | a>>(32-5)
b30 := b<<30 | b>>(32-30) b30 := b<<30 | b>>(32-30)
t := a5 + f + e + w[i] + _K1 t := a5 + f + e + w[i&0xf] + _K1
a, b, c, d, e = t, a, b30, c, d a, b, c, d, e = t, a, b30, c, d
} }
for i := 40; i < 60; i++ { for ; i < 60; i++ {
f := b&c | b&d | c&d tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
w[i&0xf] = tmp<<1 | tmp>>(32-1)
f := ((b | c) & d) | (b & c)
a5 := a<<5 | a>>(32-5) a5 := a<<5 | a>>(32-5)
b30 := b<<30 | b>>(32-30) b30 := b<<30 | b>>(32-30)
t := a5 + f + e + w[i] + _K2 t := a5 + f + e + w[i&0xf] + _K2
a, b, c, d, e = t, a, b30, c, d a, b, c, d, e = t, a, b30, c, d
} }
for i := 60; i < 80; i++ { for ; i < 80; i++ {
tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
w[i&0xf] = tmp<<1 | tmp>>(32-1)
f := b ^ c ^ d f := b ^ c ^ d
a5 := a<<5 | a>>(32-5) a5 := a<<5 | a>>(32-5)
b30 := b<<30 | b>>(32-30) b30 := b<<30 | b>>(32-30)
t := a5 + f + e + w[i] + _K3 t := a5 + f + e + w[i&0xf] + _K3
a, b, c, d, e = t, a, b30, c, d a, b, c, d, e = t, a, b30, c, d
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment