Commit f0fcb2d5 authored by Robert Griesemer's avatar Robert Griesemer

Symmetric changes to md4.go as for md5.go.

Use uint index variables in some cases instead
of int to enable strength reduction; this makes
it possible for the compiler to reduce % into
masks.

Old code: 6g -S md4.go md4block.go | grep "md4block.go:44"
0471 (md4block.go:44) MOVL    AX,BX
0472 (md4block.go:44) MOVL    AX,BP
0473 (md4block.go:44) MOVL    AX,R8
0474 (md4block.go:44) SARL    $31,R8
0475 (md4block.go:44) SHRL    $30,R8
0476 (md4block.go:44) ADDL    R8,BP
0477 (md4block.go:44) SARL    $2,BP
0478 (md4block.go:44) IMULL   $4,BP
0479 (md4block.go:44) SUBL    BP,BX
0480 (md4block.go:44) MOVLQSX BX,BX
0481 (md4block.go:44) LEAQ    shift1+0(SB),BP
0482 (md4block.go:44) CMPL    BX,8(BP)
0483 (md4block.go:44) JCS     ,485
0484 (md4block.go:44) CALL    ,runtime.throwindex+0(SB)
0485 (md4block.go:44) MOVQ    (BP),BP
0486 (md4block.go:44) MOVL    (BP)(BX*4),DI

New code: 6g -S md4.go md4block.go | grep "md4block.go:44"
0471 (md4block.go:44) MOVL    AX,BX
0472 (md4block.go:44) ANDL    $3,BX
0473 (md4block.go:44) MOVLQZX BX,BX
0474 (md4block.go:44) LEAQ    shift1+0(SB),BP
0475 (md4block.go:44) CMPL    BX,8(BP)
0476 (md4block.go:44) JCS     ,478
0477 (md4block.go:44) CALL    ,runtime.throwindex+0(SB)
0478 (md4block.go:44) MOVQ    (BP),BP
0479 (md4block.go:44) MOVL    (BP)(BX*4),DI

R=agl, agl1
CC=golang-dev
https://golang.org/cl/181086
parent 9d07d37f
...@@ -68,8 +68,8 @@ func (d *digest) Write(p []byte) (nn int, err os.Error) { ...@@ -68,8 +68,8 @@ func (d *digest) Write(p []byte) (nn int, err os.Error) {
n := _Block(d, p) n := _Block(d, p)
p = p[n:] p = p[n:]
if len(p) > 0 { if len(p) > 0 {
for i := 0; i < len(p); i++ { for i, x := range p {
d.x[i] = p[i] d.x[i] = x
} }
d.nx = len(p) d.nx = len(p)
} }
...@@ -100,16 +100,12 @@ func (d *digest) Sum() []byte { ...@@ -100,16 +100,12 @@ func (d *digest) Sum() []byte {
p := make([]byte, 16) p := make([]byte, 16)
j := 0 j := 0
for i := 0; i < 4; i++ { for _, s := range d.s {
s := d.s[i] p[j+0] = byte(s >> 0)
p[j] = byte(s) p[j+1] = byte(s >> 8)
j++ p[j+2] = byte(s >> 16)
p[j] = byte(s >> 8) p[j+3] = byte(s >> 24)
j++ j += 4
p[j] = byte(s >> 16)
j++
p[j] = byte(s >> 24)
j++
} }
return p return p
} }
...@@ -25,9 +25,10 @@ func _Block(dig *digest, p []byte) int { ...@@ -25,9 +25,10 @@ func _Block(dig *digest, p []byte) int {
for len(p) >= _Chunk { for len(p) >= _Chunk {
aa, bb, cc, dd := a, b, c, d aa, bb, cc, dd := a, b, c, d
j := 0
for i := 0; i < 16; i++ { for i := 0; i < 16; i++ {
j := i * 4
X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24 X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
j += 4
} }
// If this needs to be made faster in the future, // If this needs to be made faster in the future,
...@@ -37,9 +38,12 @@ func _Block(dig *digest, p []byte) int { ...@@ -37,9 +38,12 @@ func _Block(dig *digest, p []byte) int {
// with suitable variable renaming in each // with suitable variable renaming in each
// unrolled body, delete the a, b, c, d = d, a, b, c // unrolled body, delete the a, b, c, d = d, a, b, c
// (or you can let the optimizer do the renaming). // (or you can let the optimizer do the renaming).
//
// The index variables are uint so that % by a power
// of two can be optimized easily by a compiler.
// Round 1. // Round 1.
for i := 0; i < 16; i++ { for i := uint(0); i < 16; i++ {
x := i x := i
s := shift1[i%4] s := shift1[i%4]
f := ((c ^ d) & b) ^ d f := ((c ^ d) & b) ^ d
...@@ -49,7 +53,7 @@ func _Block(dig *digest, p []byte) int { ...@@ -49,7 +53,7 @@ func _Block(dig *digest, p []byte) int {
} }
// Round 2. // Round 2.
for i := 0; i < 16; i++ { for i := uint(0); i < 16; i++ {
x := xIndex2[i] x := xIndex2[i]
s := shift2[i%4] s := shift2[i%4]
g := (b & c) | (b & d) | (c & d) g := (b & c) | (b & d) | (c & d)
...@@ -59,7 +63,7 @@ func _Block(dig *digest, p []byte) int { ...@@ -59,7 +63,7 @@ func _Block(dig *digest, p []byte) int {
} }
// Round 3. // Round 3.
for i := 0; i < 16; i++ { for i := uint(0); i < 16; i++ {
x := xIndex3[i] x := xIndex3[i]
s := shift3[i%4] s := shift3[i%4]
h := b ^ c ^ d h := b ^ c ^ d
......
...@@ -98,9 +98,10 @@ func _Block(dig *digest, p []byte) int { ...@@ -98,9 +98,10 @@ func _Block(dig *digest, p []byte) int {
for len(p) >= _Chunk { for len(p) >= _Chunk {
aa, bb, cc, dd := a, b, c, d aa, bb, cc, dd := a, b, c, d
j := 0
for i := 0; i < 16; i++ { for i := 0; i < 16; i++ {
j := i * 4
X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24 X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
j += 4
} }
// If this needs to be made faster in the future, // If this needs to be made faster in the future,
...@@ -110,52 +111,47 @@ func _Block(dig *digest, p []byte) int { ...@@ -110,52 +111,47 @@ func _Block(dig *digest, p []byte) int {
// with suitable variable renaming in each // with suitable variable renaming in each
// unrolled body, delete the a, b, c, d = d, a, b, c // unrolled body, delete the a, b, c, d = d, a, b, c
// (or you can let the optimizer do the renaming). // (or you can let the optimizer do the renaming).
//
// The index variables are uint so that % by a power
// of two can be optimized easily by a compiler.
// Round 1. // Round 1.
for i := 0; i < 16; i++ { for i := uint(0); i < 16; i++ {
x := i x := i
t := i
s := shift1[i%4] s := shift1[i%4]
f := ((c ^ d) & b) ^ d f := ((c ^ d) & b) ^ d
a += f + X[x] + table[t] a += f + X[x] + table[i]
a = a<<s | a>>(32-s) a = a<<s | a>>(32-s) + b
a += b
a, b, c, d = d, a, b, c a, b, c, d = d, a, b, c
} }
// Round 2. // Round 2.
for i := 0; i < 16; i++ { for i := uint(0); i < 16; i++ {
x := (1 + 5*i) % 16 x := (1 + 5*i) % 16
t := 16 + i
s := shift2[i%4] s := shift2[i%4]
g := ((b ^ c) & d) ^ c g := ((b ^ c) & d) ^ c
a += g + X[x] + table[t] a += g + X[x] + table[i+16]
a = a<<s | a>>(32-s) a = a<<s | a>>(32-s) + b
a += b
a, b, c, d = d, a, b, c a, b, c, d = d, a, b, c
} }
// Round 3. // Round 3.
for i := 0; i < 16; i++ { for i := uint(0); i < 16; i++ {
x := (5 + 3*i) % 16 x := (5 + 3*i) % 16
t := 32 + i
s := shift3[i%4] s := shift3[i%4]
h := b ^ c ^ d h := b ^ c ^ d
a += h + X[x] + table[t] a += h + X[x] + table[i+32]
a = a<<s | a>>(32-s) a = a<<s | a>>(32-s) + b
a += b
a, b, c, d = d, a, b, c a, b, c, d = d, a, b, c
} }
// Round 4. // Round 4.
for i := 0; i < 16; i++ { for i := uint(0); i < 16; i++ {
x := (7 * i) % 16 x := (7 * i) % 16
s := shift4[i%4] s := shift4[i%4]
t := 48 + i j := c ^ (b | ^d)
ii := c ^ (b | ^d) a += j + X[x] + table[i+48]
a += ii + X[x] + table[t] a = a<<s | a>>(32-s) + b
a = a<<s | a>>(32-s)
a += b
a, b, c, d = d, a, b, c a, b, c, d = d, a, b, c
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment