Commit 31c96fc2 authored by Josselin Costanzi's avatar Josselin Costanzi Committed by Brad Fitzpatrick

encoding/base64: Optimize DecodeString

Optimize DecodeString for the common case where most of the input isn't
a newline or a padding character.
Also add some testcases found when fuzzing this implementation against
upstream.
Change Decode benchmark to run with different input sizes.

name                 old time/op    new time/op    delta
DecodeString/2-4       71.5ns ± 4%    70.0ns ± 6%     ~     (p=0.246 n=5+5)
DecodeString/4-4        112ns ±25%      91ns ± 2%     ~     (p=0.056 n=5+5)
DecodeString/8-4        136ns ± 5%     126ns ± 5%   -7.33%  (p=0.016 n=5+5)
DecodeString/64-4       872ns ±29%     652ns ±21%  -25.23%  (p=0.032 n=5+5)
DecodeString/8192-4    90.9µs ±21%    61.0µs ±13%  -32.87%  (p=0.008 n=5+5)

name                 old speed      new speed      delta
DecodeString/2-4     56.0MB/s ± 4%  57.2MB/s ± 6%     ~     (p=0.310 n=5+5)
DecodeString/4-4     73.4MB/s ±23%  87.7MB/s ± 2%     ~     (p=0.056 n=5+5)
DecodeString/8-4     87.8MB/s ± 5%  94.8MB/s ± 5%   +7.98%  (p=0.016 n=5+5)
DecodeString/64-4     103MB/s ±24%   136MB/s ±19%  +32.63%  (p=0.032 n=5+5)
DecodeString/8192-4   122MB/s ±19%   180MB/s ±11%  +47.75%  (p=0.008 n=5+5)

Improves #19636

Change-Id: I39667f4fb682a12b3137946d017ad999553c5780
Reviewed-on: https://go-review.googlesource.com/34950Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent d9b1f9e8
...@@ -273,44 +273,50 @@ func (e CorruptInputError) Error() string { ...@@ -273,44 +273,50 @@ func (e CorruptInputError) Error() string {
// indicates if end-of-message padding or a partial quantum was encountered // indicates if end-of-message padding or a partial quantum was encountered
// and thus any additional data is an error. // and thus any additional data is an error.
func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
var inIdx int
si := 0 si := 0
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
for si < len(src) && !end { for si < len(src) && !end {
// Decode quantum using the base64 alphabet // Decode quantum using the base64 alphabet
var dbuf [4]byte var dbuf [4]byte
dinc, dlen := 3, 4 dinc, dlen := 3, 4
for j := range dbuf { for j := 0; j < len(dbuf); j++ {
if len(src) == si { if len(src) == si {
if enc.padChar != NoPadding || j < 2 { switch {
case j == 0:
return n, false, nil
case j == 1, enc.padChar != NoPadding:
return n, false, CorruptInputError(si - j) return n, false, CorruptInputError(si - j)
} }
dinc, dlen, end = j-1, j, true dinc, dlen, end = j-1, j, true
break break
} }
in := src[si] in := src[si]
inIdx = si
si++ si++
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') { out := enc.decodeMap[in]
si++ if out != 0xFF {
dbuf[j] = out
continue
} }
if in == '\n' || in == '\r' {
j--
continue
}
if rune(in) == enc.padChar { if rune(in) == enc.padChar {
// We've reached the end and there's padding // We've reached the end and there's padding
switch j { switch j {
case 0, 1: case 0, 1:
// incorrect padding // incorrect padding
return n, false, CorruptInputError(inIdx) return n, false, CorruptInputError(si - 1)
case 2: case 2:
// "==" is expected, the first "=" is already consumed. // "==" is expected, the first "=" is already consumed.
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
if si == len(src) { if si == len(src) {
// not enough padding // not enough padding
return n, false, CorruptInputError(len(src)) return n, false, CorruptInputError(len(src))
...@@ -321,10 +327,10 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { ...@@ -321,10 +327,10 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
} }
si++ si++
// skip over newlines }
for si < len(src) && (src[si] == '\n' || src[si] == '\r') { // skip over newlines
si++ for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
} si++
} }
if si < len(src) { if si < len(src) {
// trailing garbage // trailing garbage
...@@ -333,10 +339,7 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { ...@@ -333,10 +339,7 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
dinc, dlen, end = 3, j, true dinc, dlen, end = 3, j, true
break break
} }
dbuf[j] = enc.decodeMap[in] return n, false, CorruptInputError(si - 1)
if dbuf[j] == 0xFF {
return n, false, CorruptInputError(inIdx)
}
} }
// Convert 4x 6bit source bytes into 3 bytes // Convert 4x 6bit source bytes into 3 bytes
......
...@@ -7,6 +7,7 @@ package base64 ...@@ -7,6 +7,7 @@ package base64
import ( import (
"bytes" "bytes"
"errors" "errors"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"reflect" "reflect"
...@@ -202,6 +203,9 @@ func TestDecodeCorrupt(t *testing.T) { ...@@ -202,6 +203,9 @@ func TestDecodeCorrupt(t *testing.T) {
offset int // -1 means no corruption. offset int // -1 means no corruption.
}{ }{
{"", -1}, {"", -1},
{"\n", -1},
{"AAA=\n", -1},
{"AAAA\n", -1},
{"!!!!", 0}, {"!!!!", 0},
{"====", 0}, {"====", 0},
{"x===", 1}, {"x===", 1},
...@@ -468,10 +472,19 @@ func BenchmarkEncodeToString(b *testing.B) { ...@@ -468,10 +472,19 @@ func BenchmarkEncodeToString(b *testing.B) {
} }
func BenchmarkDecodeString(b *testing.B) { func BenchmarkDecodeString(b *testing.B) {
data := StdEncoding.EncodeToString(make([]byte, 8192)) sizes := []int{2, 4, 8, 64, 8192}
b.SetBytes(int64(len(data))) benchFunc := func(b *testing.B, benchSize int) {
for i := 0; i < b.N; i++ { data := StdEncoding.EncodeToString(make([]byte, benchSize))
StdEncoding.DecodeString(data) b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
StdEncoding.DecodeString(data)
}
}
for _, size := range sizes {
b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
benchFunc(b, size)
})
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment