Commit 626da8d7 authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

encoding/json: speed up decoding

Don't make copies of keys while decoding, and don't use the
expensive strings.EqualFold when it's not necessary. Instead,
note in the existing field cache what algorithm to use to
check fold equality... most keys are just ASCII letters.

benchmark               old ns/op    new ns/op    delta
BenchmarkCodeDecoder    137074314    103974418  -24.15%

benchmark                old MB/s     new MB/s  speedup
BenchmarkCodeDecoder        14.16        18.66    1.32x

Update #6496

R=golang-dev, rsc, adg, r, mikioh.mikioh
CC=golang-dev
https://golang.org/cl/13894045
parent 5334b73d
......@@ -8,6 +8,7 @@
package json
import (
"bytes"
"encoding"
"encoding/base64"
"errors"
......@@ -15,7 +16,6 @@ import (
"reflect"
"runtime"
"strconv"
"strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
......@@ -500,11 +500,11 @@ func (d *decodeState) object(v reflect.Value) {
d.error(errPhase)
}
// Read string key.
// Read key.
start := d.off - 1
op = d.scanWhile(scanContinue)
item := d.data[start : d.off-1]
key, ok := unquote(item)
key, ok := unquoteBytes(item)
if !ok {
d.error(errPhase)
}
......@@ -526,11 +526,11 @@ func (d *decodeState) object(v reflect.Value) {
fields := cachedTypeFields(v.Type())
for i := range fields {
ff := &fields[i]
if ff.name == key {
if bytes.Equal(ff.nameBytes, key) {
f = ff
break
}
if f == nil && strings.EqualFold(ff.name, key) {
if f == nil && ff.equalFold(ff.nameBytes, key) {
f = ff
}
}
......
......@@ -936,6 +936,9 @@ func (e *encodeState) stringBytes(s []byte) (int, error) {
// A field represents a single field found in a struct.
type field struct {
name string
nameBytes []byte // []byte(name)
equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
tag bool
index []int
typ reflect.Type
......@@ -943,6 +946,12 @@ type field struct {
quoted bool
}
func fillField(f field) field {
f.nameBytes = []byte(f.name)
f.equalFold = foldFunc(f.nameBytes)
return f
}
// byName sorts field by name, breaking ties with depth,
// then breaking ties with "name came from json tag", then
// breaking ties with index sequence.
......@@ -1042,8 +1051,14 @@ func typeFields(t reflect.Type) []field {
if name == "" {
name = sf.Name
}
fields = append(fields, field{name, tagged, index, ft,
opts.Contains("omitempty"), opts.Contains("string")})
fields = append(fields, fillField(field{
name: name,
tag: tagged,
index: index,
typ: ft,
omitEmpty: opts.Contains("omitempty"),
quoted: opts.Contains("string"),
}))
if count[f.typ] > 1 {
// If there were multiple instances, add a second,
// so that the annihilation code will see a duplicate.
......@@ -1057,7 +1072,7 @@ func typeFields(t reflect.Type) []field {
// Record new anonymous struct to explore in next round.
nextCount[ft]++
if nextCount[ft] == 1 {
next = append(next, field{name: ft.Name(), index: index, typ: ft})
next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft}))
}
}
}
......
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"bytes"
"unicode/utf8"
)
const (
caseMask = ^byte(0x20) // Mask to ignore case in ASCII.
kelvin = '\u212a'
smallLongEss = '\u017f'
)
// foldFunc returns one of four different case folding equivalence
// functions, from most general (and slow) to fastest:
//
// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
// 3) asciiEqualFold, no special, but includes non-letters (including _)
// 4) simpleLetterEqualFold, no specials, no non-letters.
//
// The letters S and K are special because they map to 3 runes, not just 2:
// * S maps to s and to U+017F 'ſ' Latin small letter long s
// * k maps to K and to U+212A 'K' Kelvin sign
// See http://play.golang.org/p/tTxjOc0OGo
//
// The returned function is specialized for matching against s and
// should only be given s. It's not curried for performance reasons.
func foldFunc(s []byte) func(s, t []byte) bool {
nonLetter := false
special := false // special letter
for _, b := range s {
if b >= utf8.RuneSelf {
return bytes.EqualFold
}
upper := b & caseMask
if upper < 'A' || upper > 'Z' {
nonLetter = true
} else if upper == 'K' || upper == 'S' {
// See above for why these letters are special.
special = true
}
}
if special {
return equalFoldRight
}
if nonLetter {
return asciiEqualFold
}
return simpleLetterEqualFold
}
// equalFoldRight is a specialization of bytes.EqualFold when s is
// known to be all ASCII (including punctuation), but contains an 's',
// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
// See comments on foldFunc.
func equalFoldRight(s, t []byte) bool {
for _, sb := range s {
if len(t) == 0 {
return false
}
tb := t[0]
if tb < utf8.RuneSelf {
if sb != tb {
sbUpper := sb & caseMask
if 'A' <= sbUpper && sbUpper <= 'Z' {
if sbUpper != tb&caseMask {
return false
}
} else {
return false
}
}
t = t[1:]
continue
}
// sb is ASCII and t is not. t must be either kelvin
// sign or long s; sb must be s, S, k, or K.
tr, size := utf8.DecodeRune(t)
switch sb {
case 's', 'S':
if tr != smallLongEss {
return false
}
case 'k', 'K':
if tr != kelvin {
return false
}
default:
return false
}
t = t[size:]
}
if len(t) > 0 {
return false
}
return true
}
// asciiEqualFold is a specialization of bytes.EqualFold for use when
// s is all ASCII (but may contain non-letters) and contains no
// special-folding letters.
// See comments on foldFunc.
func asciiEqualFold(s, t []byte) bool {
if len(s) != len(t) {
return false
}
for i, sb := range s {
tb := t[i]
if sb == tb {
continue
}
if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
if sb&caseMask != tb&caseMask {
return false
}
} else {
return false
}
}
return true
}
// simpleLetterEqualFold is a specialization of bytes.EqualFold for
// use when s is all ASCII letters (no underscores, etc) and also
// doesn't contain 'k', 'K', 's', or 'S'.
// See comments on foldFunc.
func simpleLetterEqualFold(s, t []byte) bool {
if len(s) != len(t) {
return false
}
for i, b := range s {
if b&caseMask != t[i]&caseMask {
return false
}
}
return true
}
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"bytes"
"strings"
"testing"
"unicode/utf8"
)
var foldTests = []struct {
fn func(s, t []byte) bool
s, t string
want bool
}{
{equalFoldRight, "", "", true},
{equalFoldRight, "a", "a", true},
{equalFoldRight, "", "a", false},
{equalFoldRight, "a", "", false},
{equalFoldRight, "a", "A", true},
{equalFoldRight, "AB", "ab", true},
{equalFoldRight, "AB", "ac", false},
{equalFoldRight, "sbkKc", "ſbKKc", true},
{equalFoldRight, "SbKkc", "ſbKKc", true},
{equalFoldRight, "SbKkc", "ſbKK", false},
{equalFoldRight, "e", "é", false},
{equalFoldRight, "s", "S", true},
{simpleLetterEqualFold, "", "", true},
{simpleLetterEqualFold, "abc", "abc", true},
{simpleLetterEqualFold, "abc", "ABC", true},
{simpleLetterEqualFold, "abc", "ABCD", false},
{simpleLetterEqualFold, "abc", "xxx", false},
{asciiEqualFold, "a_B", "A_b", true},
{asciiEqualFold, "aa@", "aa`", false}, // verify 0x40 and 0x60 aren't case-equivalent
}
func TestFold(t *testing.T) {
for i, tt := range foldTests {
if got := tt.fn([]byte(tt.s), []byte(tt.t)); got != tt.want {
t.Errorf("%d. %q, %q = %v; want %v", i, tt.s, tt.t, got, tt.want)
}
truth := strings.EqualFold(tt.s, tt.t)
if truth != tt.want {
t.Errorf("strings.EqualFold doesn't agree with case %d", i)
}
}
}
func TestFoldAgainstUnicode(t *testing.T) {
const bufSize = 5
buf1 := make([]byte, 0, bufSize)
buf2 := make([]byte, 0, bufSize)
var runes []rune
for i := 0x20; i <= 0x7f; i++ {
runes = append(runes, rune(i))
}
runes = append(runes, kelvin, smallLongEss)
funcs := []struct {
name string
fold func(s, t []byte) bool
letter bool // must be ASCII letter
simple bool // must be simple ASCII letter (not 'S' or 'K')
}{
{
name: "equalFoldRight",
fold: equalFoldRight,
},
{
name: "asciiEqualFold",
fold: asciiEqualFold,
simple: true,
},
{
name: "simpleLetterEqualFold",
fold: simpleLetterEqualFold,
simple: true,
letter: true,
},
}
for _, ff := range funcs {
for _, r := range runes {
if r >= utf8.RuneSelf {
continue
}
if ff.letter && !isASCIILetter(byte(r)) {
continue
}
if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') {
continue
}
for _, r2 := range runes {
buf1 := append(buf1[:0], 'x')
buf2 := append(buf2[:0], 'x')
buf1 = buf1[:1+utf8.EncodeRune(buf1[1:bufSize], r)]
buf2 = buf2[:1+utf8.EncodeRune(buf2[1:bufSize], r2)]
buf1 = append(buf1, 'x')
buf2 = append(buf2, 'x')
want := bytes.EqualFold(buf1, buf2)
if got := ff.fold(buf1, buf2); got != want {
t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want)
}
}
}
}
}
func isASCIILetter(b byte) bool {
return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z')
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment