Commit 5f403517 authored by Daniel Martí's avatar Daniel Martí

encoding/base64: speed up the decoder

Most of the decoding time is spent in the first Decode loop, since the
rest of the function only deals with the few remaining bytes. Any
unnecessary work done in that loop body matters tremendously.

One such unnecessary bottleneck was the use of the enc.decodeMap table.
Since enc is a pointer receiver, and the field is used within the
non-inlineable function decode64, the decoder must perform a nil check
at every iteration.

To fix that, move the enc.decodeMap uses to the parent function, where
we can lift the nil check outside the loop. That gives roughly a 15%
speed-up. The function no longer performs decoding per se, so rename it.
While at it, remove the now unnecessary receivers.

An unfortunate side effect of this change is that the loop now contains
eight bounds checks on src instead of just one. However, not having to
slice src plus the nil check removal well outweigh the added cost.

The other piece that made decode64 slow was that it wasn't inlined, and
had multiple branches. Use a simple bitwise-or trick suggested by Roger
Peppe, and collapse the rest of the bitwise logic into a single
expression. Inlinability and the reduced branching give a further 10%
speed-up.

Finally, add these two functions to TestIntendedInlining, since we want
them to stay inlinable.

Apply the same refactor to decode32 for consistency, and to let 32-bit
architectures see a similar performance gain for large inputs.

name                 old time/op    new time/op    delta
DecodeString/2-8       47.3ns ± 1%    45.8ns ± 0%   -3.28%  (p=0.002 n=6+6)
DecodeString/4-8       55.8ns ± 2%    51.5ns ± 0%   -7.71%  (p=0.004 n=5+6)
DecodeString/8-8       64.9ns ± 0%    61.7ns ± 0%   -4.99%  (p=0.004 n=5+6)
DecodeString/64-8       238ns ± 0%     198ns ± 0%  -16.54%  (p=0.002 n=6+6)
DecodeString/8192-8    19.5µs ± 0%    14.6µs ± 0%  -24.96%  (p=0.004 n=6+5)

name                 old speed      new speed      delta
DecodeString/2-8     84.6MB/s ± 1%  87.4MB/s ± 0%   +3.38%  (p=0.002 n=6+6)
DecodeString/4-8      143MB/s ± 2%   155MB/s ± 0%   +8.41%  (p=0.004 n=5+6)
DecodeString/8-8      185MB/s ± 0%   195MB/s ± 0%   +5.29%  (p=0.004 n=5+6)
DecodeString/64-8     369MB/s ± 0%   442MB/s ± 0%  +19.78%  (p=0.002 n=6+6)
DecodeString/8192-8   560MB/s ± 0%   746MB/s ± 0%  +33.27%  (p=0.004 n=6+5)

Updates #19636.

Change-Id: Ib839577b0e3f5a2bb201f5cae580c61365d92894
Reviewed-on: https://go-review.googlesource.com/c/go/+/151177
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: default avatarroger peppe <rogpeppe@gmail.com>
parent a5fdd58c
...@@ -119,6 +119,10 @@ func TestIntendedInlining(t *testing.T) { ...@@ -119,6 +119,10 @@ func TestIntendedInlining(t *testing.T) {
"byLiteral.Less", "byLiteral.Less",
"byLiteral.Swap", "byLiteral.Swap",
}, },
"encoding/base64": {
"assemble32",
"assemble64",
},
"unicode/utf8": { "unicode/utf8": {
"FullRune", "FullRune",
"FullRuneInString", "FullRuneInString",
......
...@@ -284,6 +284,9 @@ func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err err ...@@ -284,6 +284,9 @@ func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err err
var dbuf [4]byte var dbuf [4]byte
dlen := 4 dlen := 4
// Lift the nil check outside of the loop.
_ = enc.decodeMap
for j := 0; j < len(dbuf); j++ { for j := 0; j < len(dbuf); j++ {
if len(src) == si { if len(src) == si {
switch { switch {
...@@ -467,9 +470,23 @@ func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { ...@@ -467,9 +470,23 @@ func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
return 0, nil return 0, nil
} }
// Lift the nil check outside of the loop. enc.decodeMap is directly
// used later in this function, to let the compiler know that the
// receiver can't be nil.
_ = enc.decodeMap
si := 0 si := 0
for strconv.IntSize >= 64 && len(src)-si >= 8 && len(dst)-n >= 8 { for strconv.IntSize >= 64 && len(src)-si >= 8 && len(dst)-n >= 8 {
if dn, ok := enc.decode64(src[si:]); ok { if dn, ok := assemble64(
enc.decodeMap[src[si+0]],
enc.decodeMap[src[si+1]],
enc.decodeMap[src[si+2]],
enc.decodeMap[src[si+3]],
enc.decodeMap[src[si+4]],
enc.decodeMap[src[si+5]],
enc.decodeMap[src[si+6]],
enc.decodeMap[src[si+7]],
); ok {
binary.BigEndian.PutUint64(dst[n:], dn) binary.BigEndian.PutUint64(dst[n:], dn)
n += 6 n += 6
si += 8 si += 8
...@@ -484,7 +501,12 @@ func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { ...@@ -484,7 +501,12 @@ func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
} }
for len(src)-si >= 4 && len(dst)-n >= 4 { for len(src)-si >= 4 && len(dst)-n >= 4 {
if dn, ok := enc.decode32(src[si:]); ok { if dn, ok := assemble32(
enc.decodeMap[src[si+0]],
enc.decodeMap[src[si+1]],
enc.decodeMap[src[si+2]],
enc.decodeMap[src[si+3]],
); ok {
binary.BigEndian.PutUint32(dst[n:], dn) binary.BigEndian.PutUint32(dst[n:], dn)
n += 3 n += 3
si += 4 si += 4
...@@ -509,70 +531,40 @@ func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { ...@@ -509,70 +531,40 @@ func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
return n, err return n, err
} }
// decode32 tries to decode 4 base64 characters into 3 bytes, and returns those // assemble32 assembles 4 base64 digits into 3 bytes.
// bytes. len(src) must be >= 4. // Each digit comes from the decode map, and will be 0xff
// Returns (0, false) if decoding failed. // if it came from an invalid character.
func (enc *Encoding) decode32(src []byte) (dn uint32, ok bool) { func assemble32(n1, n2, n3, n4 byte) (dn uint32, ok bool) {
var n uint32 // Check that all the digits are valid. If any of them was 0xff, their
_ = src[3] // bitwise OR will be 0xff.
if n = uint32(enc.decodeMap[src[0]]); n == 0xff { if n1|n2|n3|n4 == 0xff {
return 0, false
}
dn |= n << 26
if n = uint32(enc.decodeMap[src[1]]); n == 0xff {
return 0, false
}
dn |= n << 20
if n = uint32(enc.decodeMap[src[2]]); n == 0xff {
return 0, false return 0, false
} }
dn |= n << 14 return uint32(n1)<<26 |
if n = uint32(enc.decodeMap[src[3]]); n == 0xff { uint32(n2)<<20 |
return 0, false uint32(n3)<<14 |
} uint32(n4)<<8,
dn |= n << 8 true
return dn, true
} }
// decode64 tries to decode 8 base64 characters into 6 bytes, and returns those // assemble64 assembles 8 base64 digits into 6 bytes.
// bytes. len(src) must be >= 8. // Each digit comes from the decode map, and will be 0xff
// Returns (0, false) if decoding failed. // if it came from an invalid character.
func (enc *Encoding) decode64(src []byte) (dn uint64, ok bool) { func assemble64(n1, n2, n3, n4, n5, n6, n7, n8 byte) (dn uint64, ok bool) {
var n uint64 // Check that all the digits are valid. If any of them was 0xff, their
_ = src[7] // bitwise OR will be 0xff.
if n = uint64(enc.decodeMap[src[0]]); n == 0xff { if n1|n2|n3|n4|n5|n6|n7|n8 == 0xff {
return 0, false
}
dn |= n << 58
if n = uint64(enc.decodeMap[src[1]]); n == 0xff {
return 0, false
}
dn |= n << 52
if n = uint64(enc.decodeMap[src[2]]); n == 0xff {
return 0, false
}
dn |= n << 46
if n = uint64(enc.decodeMap[src[3]]); n == 0xff {
return 0, false
}
dn |= n << 40
if n = uint64(enc.decodeMap[src[4]]); n == 0xff {
return 0, false
}
dn |= n << 34
if n = uint64(enc.decodeMap[src[5]]); n == 0xff {
return 0, false
}
dn |= n << 28
if n = uint64(enc.decodeMap[src[6]]); n == 0xff {
return 0, false
}
dn |= n << 22
if n = uint64(enc.decodeMap[src[7]]); n == 0xff {
return 0, false return 0, false
} }
dn |= n << 16 return uint64(n1)<<58 |
return dn, true uint64(n2)<<52 |
uint64(n3)<<46 |
uint64(n4)<<40 |
uint64(n5)<<34 |
uint64(n6)<<28 |
uint64(n7)<<22 |
uint64(n8)<<16,
true
} }
type newlineFilteringReader struct { type newlineFilteringReader struct {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment