Commit 5d118386 authored by Pascal S. de Kloe's avatar Pascal S. de Kloe Committed by Brad Fitzpatrick

encoding/json: read ahead after value consumption

Eliminates the need for an extra scanner, read undo and some other tricks.

name                    old time/op    new time/op    delta
CodeEncoder-12            1.92ms ± 0%    1.91ms ± 1%   -0.65%  (p=0.000 n=17+20)
CodeMarshal-12            2.13ms ± 2%    2.12ms ± 1%   -0.49%  (p=0.038 n=18+17)
CodeDecoder-12            8.55ms ± 2%    8.49ms ± 1%     ~     (p=0.119 n=20+18)
UnicodeDecoder-12          411ns ± 0%     422ns ± 0%   +2.77%  (p=0.000 n=19+15)
DecoderStream-12           320ns ± 1%     307ns ± 1%   -3.80%  (p=0.000 n=18+20)
CodeUnmarshal-12          9.65ms ± 3%    9.58ms ± 3%     ~     (p=0.157 n=20+20)
CodeUnmarshalReuse-12     8.54ms ± 3%    8.56ms ± 2%     ~     (p=0.602 n=20+20)
UnmarshalString-12         110ns ± 1%      87ns ± 2%  -21.53%  (p=0.000 n=16+20)
UnmarshalFloat64-12        101ns ± 1%      77ns ± 2%  -23.08%  (p=0.000 n=19+20)
UnmarshalInt64-12         94.5ns ± 2%    68.4ns ± 1%  -27.60%  (p=0.000 n=20+20)
Issue10335-12              128ns ± 1%     100ns ± 1%  -21.89%  (p=0.000 n=19+18)
Unmapped-12                427ns ± 3%     247ns ± 4%  -42.17%  (p=0.000 n=20+20)
NumberIsValid-12          23.0ns ± 0%    21.7ns ± 0%   -5.73%  (p=0.000 n=20+20)
NumberIsValidRegexp-12     641ns ± 0%     642ns ± 0%   +0.15%  (p=0.003 n=19+19)
EncoderEncode-12          56.9ns ± 0%    55.0ns ± 1%   -3.32%  (p=0.012 n=2+17)

name                    old speed      new speed      delta
CodeEncoder-12          1.01GB/s ± 1%  1.02GB/s ± 1%   +0.71%  (p=0.000 n=18+20)
CodeMarshal-12           913MB/s ± 2%   917MB/s ± 1%   +0.49%  (p=0.038 n=18+17)
CodeDecoder-12           227MB/s ± 2%   229MB/s ± 1%     ~     (p=0.110 n=20+18)
UnicodeDecoder-12       34.1MB/s ± 0%  33.1MB/s ± 0%   -2.73%  (p=0.000 n=19+19)
CodeUnmarshal-12         201MB/s ± 3%   203MB/s ± 3%     ~     (p=0.151 n=20+20)

name                    old alloc/op   new alloc/op   delta
Issue10335-12               320B ± 0%      184B ± 0%  -42.50%  (p=0.000 n=20+20)
Unmapped-12                 568B ± 0%      216B ± 0%  -61.97%  (p=0.000 n=20+20)
EncoderEncode-12           0.00B          0.00B          ~     (all equal)

name                    old allocs/op  new allocs/op  delta
Issue10335-12               4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.000 n=20+20)
Unmapped-12                 18.0 ± 0%       4.0 ± 0%  -77.78%  (p=0.000 n=20+20)
EncoderEncode-12            0.00           0.00          ~     (all equal)

Fixes #17914
Updates #20693
Updates #10335

Change-Id: I0459a52febb8b79c9a2991e69ed2614cf8740429
Reviewed-on: https://go-review.googlesource.com/47152Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent c15984c6
...@@ -189,6 +189,7 @@ func (d *decodeState) unmarshal(v interface{}) (err error) { ...@@ -189,6 +189,7 @@ func (d *decodeState) unmarshal(v interface{}) (err error) {
} }
d.scan.reset() d.scan.reset()
d.scanWhile(scanSkipSpace)
// We decode rv not rv.Elem because the Unmarshaler interface // We decode rv not rv.Elem because the Unmarshaler interface
// test must be applied at the top level of the value. // test must be applied at the top level of the value.
d.value(rv) d.value(rv)
...@@ -274,9 +275,9 @@ func isValidNumber(s string) bool { ...@@ -274,9 +275,9 @@ func isValidNumber(s string) bool {
// decodeState represents the state while decoding a JSON value. // decodeState represents the state while decoding a JSON value.
type decodeState struct { type decodeState struct {
data []byte data []byte
off int // read offset in data off int // next read offset in data
opcode int // last read result
scan scanner scan scanner
nextscan scanner // for calls to nextValue
errorContext struct { // provides context for type errors errorContext struct { // provides context for type errors
Struct string Struct string
Field string Field string
...@@ -286,6 +287,11 @@ type decodeState struct { ...@@ -286,6 +287,11 @@ type decodeState struct {
disallowUnknownFields bool disallowUnknownFields bool
} }
// readIndex returns the position of the last byte read.
func (d *decodeState) readIndex() int {
return d.off - 1
}
// errPhase is used for errors that should not happen unless // errPhase is used for errors that should not happen unless
// there is a bug in the JSON decoder or something is editing // there is a bug in the JSON decoder or something is editing
// the data slice while the decoder executes. // the data slice while the decoder executes.
...@@ -326,94 +332,83 @@ func (d *decodeState) addErrorContext(err error) error { ...@@ -326,94 +332,83 @@ func (d *decodeState) addErrorContext(err error) error {
return err return err
} }
// next cuts off and returns the next full JSON value in d.data[d.off:]. // skip scans to the end of what was started.
// The next value is known to be an object or array, not a literal. func (d *decodeState) skip() {
func (d *decodeState) next() []byte { s, data, i := &d.scan, d.data, d.off
c := d.data[d.off] depth := len(s.parseState)
item, rest, err := nextValue(d.data[d.off:], &d.nextscan) for {
if err != nil { op := s.step(s, data[i])
d.error(err) i++
if len(s.parseState) < depth {
d.off = i
d.opcode = op
return
}
} }
d.off = len(d.data) - len(rest) }
// Our scanner has seen the opening brace/bracket // scanNext processes the byte at d.data[d.off].
// and thinks we're still in the middle of the object. func (d *decodeState) scanNext() {
// invent a closing brace/bracket to get it out. s, data, i := &d.scan, d.data, d.off
if c == '{' { if i < len(data) {
d.scan.step(&d.scan, '}') d.opcode = s.step(s, data[i])
d.off = i + 1
} else { } else {
d.scan.step(&d.scan, ']') d.opcode = s.eof()
d.off = len(data) + 1 // mark processed EOF with len+1
} }
return item
} }
// scanWhile processes bytes in d.data[d.off:] until it // scanWhile processes bytes in d.data[d.off:] until it
// receives a scan code not equal to op. // receives a scan code not equal to op.
// It updates d.off and returns the new scan code. func (d *decodeState) scanWhile(op int) {
func (d *decodeState) scanWhile(op int) int { s, data, i := &d.scan, d.data, d.off
var newOp int for i < len(d.data) {
for { newOp := s.step(s, data[i])
if d.off >= len(d.data) { i++
newOp = d.scan.eof()
d.off = len(d.data) + 1 // mark processed EOF with len+1
} else {
c := d.data[d.off]
d.off++
newOp = d.scan.step(&d.scan, c)
}
if newOp != op { if newOp != op {
break d.opcode = newOp
d.off = i
return
} }
} }
return newOp
d.off = len(d.data) + 1 // mark processed EOF with len+1
d.opcode = d.scan.eof()
} }
// value decodes a JSON value from d.data[d.off:] into the value. // value consumes a JSON value from d.data[d.off-1:], decoding into v, and
// it updates d.off to point past the decoded value. // reads the following byte ahead. If v is invalid, the value is discarded.
// The first byte of the value has been read already.
func (d *decodeState) value(v reflect.Value) { func (d *decodeState) value(v reflect.Value) {
if !v.IsValid() { switch d.opcode {
_, rest, err := nextValue(d.data[d.off:], &d.nextscan)
if err != nil {
d.error(err)
}
d.off = len(d.data) - len(rest)
// d.scan thinks we're still at the beginning of the item.
// Feed in an empty string - the shortest, simplest value -
// so that it knows we got to the end of the value.
if d.scan.redo {
// rewind.
d.scan.redo = false
d.scan.step = stateBeginValue
}
d.scan.step(&d.scan, '"')
d.scan.step(&d.scan, '"')
n := len(d.scan.parseState)
if n > 0 && d.scan.parseState[n-1] == parseObjectKey {
// d.scan thinks we just read an object key; finish the object
d.scan.step(&d.scan, ':')
d.scan.step(&d.scan, '"')
d.scan.step(&d.scan, '"')
d.scan.step(&d.scan, '}')
}
return
}
switch op := d.scanWhile(scanSkipSpace); op {
default: default:
d.error(errPhase) d.error(errPhase)
case scanBeginArray: case scanBeginArray:
d.array(v) if v.IsValid() {
d.array(v)
} else {
d.skip()
}
d.scanNext()
case scanBeginObject: case scanBeginObject:
d.object(v) if v.IsValid() {
d.object(v)
} else {
d.skip()
}
d.scanNext()
case scanBeginLiteral: case scanBeginLiteral:
d.literal(v) // All bytes inside literal return scanContinue op code.
start := d.readIndex()
d.scanWhile(scanContinue)
if v.IsValid() {
d.literalStore(d.data[start:d.readIndex()], v, false)
}
} }
} }
...@@ -424,15 +419,17 @@ type unquotedValue struct{} ...@@ -424,15 +419,17 @@ type unquotedValue struct{}
// If it finds anything other than a quoted string literal or null, // If it finds anything other than a quoted string literal or null,
// valueQuoted returns unquotedValue{}. // valueQuoted returns unquotedValue{}.
func (d *decodeState) valueQuoted() interface{} { func (d *decodeState) valueQuoted() interface{} {
switch op := d.scanWhile(scanSkipSpace); op { switch d.opcode {
default: default:
d.error(errPhase) d.error(errPhase)
case scanBeginArray: case scanBeginArray:
d.array(reflect.Value{}) d.skip()
d.scanNext()
case scanBeginObject: case scanBeginObject:
d.object(reflect.Value{}) d.skip()
d.scanNext()
case scanBeginLiteral: case scanBeginLiteral:
switch v := d.literalInterface().(type) { switch v := d.literalInterface().(type) {
...@@ -447,7 +444,7 @@ func (d *decodeState) valueQuoted() interface{} { ...@@ -447,7 +444,7 @@ func (d *decodeState) valueQuoted() interface{} {
// until it gets to a non-pointer. // until it gets to a non-pointer.
// if it encounters an Unmarshaler, indirect stops and returns that. // if it encounters an Unmarshaler, indirect stops and returns that.
// if decodingNull is true, indirect stops at the last pointer so it can be set to nil. // if decodingNull is true, indirect stops at the last pointer so it can be set to nil.
func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) {
// Issue #24153 indicates that it is generally not a guaranteed property // Issue #24153 indicates that it is generally not a guaranteed property
// that you may round-trip a reflect.Value by calling Value.Addr().Elem() // that you may round-trip a reflect.Value by calling Value.Addr().Elem()
// and expect the value to still be settable for values derived from // and expect the value to still be settable for values derived from
...@@ -512,14 +509,15 @@ func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (Unmarshaler, ...@@ -512,14 +509,15 @@ func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (Unmarshaler,
return nil, nil, v return nil, nil, v
} }
// array consumes an array from d.data[d.off-1:], decoding into the value v. // array consumes an array from d.data[d.off-1:], decoding into v.
// the first byte of the array ('[') has been read already. // The first byte of the array ('[') has been read already.
func (d *decodeState) array(v reflect.Value) { func (d *decodeState) array(v reflect.Value) {
// Check for unmarshaler. // Check for unmarshaler.
u, ut, pv := d.indirect(v, false) u, ut, pv := indirect(v, false)
if u != nil { if u != nil {
d.off-- start := d.readIndex()
err := u.UnmarshalJSON(d.next()) d.skip()
err := u.UnmarshalJSON(d.data[start:d.off])
if err != nil { if err != nil {
d.error(err) d.error(err)
} }
...@@ -527,11 +525,9 @@ func (d *decodeState) array(v reflect.Value) { ...@@ -527,11 +525,9 @@ func (d *decodeState) array(v reflect.Value) {
} }
if ut != nil { if ut != nil {
d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)})
d.off-- d.skip()
d.next()
return return
} }
v = pv v = pv
// Check type of target. // Check type of target.
...@@ -546,8 +542,7 @@ func (d *decodeState) array(v reflect.Value) { ...@@ -546,8 +542,7 @@ func (d *decodeState) array(v reflect.Value) {
fallthrough fallthrough
default: default:
d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)})
d.off-- d.skip()
d.next()
return return
case reflect.Array: case reflect.Array:
case reflect.Slice: case reflect.Slice:
...@@ -557,15 +552,11 @@ func (d *decodeState) array(v reflect.Value) { ...@@ -557,15 +552,11 @@ func (d *decodeState) array(v reflect.Value) {
i := 0 i := 0
for { for {
// Look ahead for ] - can only happen on first iteration. // Look ahead for ] - can only happen on first iteration.
op := d.scanWhile(scanSkipSpace) d.scanWhile(scanSkipSpace)
if op == scanEndArray { if d.opcode == scanEndArray {
break break
} }
// Back up so d.value can have the byte we just read.
d.off--
d.scan.undo(op)
// Get element of array, growing if necessary. // Get element of array, growing if necessary.
if v.Kind() == reflect.Slice { if v.Kind() == reflect.Slice {
// Grow slice if necessary // Grow slice if necessary
...@@ -593,11 +584,13 @@ func (d *decodeState) array(v reflect.Value) { ...@@ -593,11 +584,13 @@ func (d *decodeState) array(v reflect.Value) {
i++ i++
// Next token must be , or ]. // Next token must be , or ].
op = d.scanWhile(scanSkipSpace) if d.opcode == scanSkipSpace {
if op == scanEndArray { d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndArray {
break break
} }
if op != scanArrayValue { if d.opcode != scanArrayValue {
d.error(errPhase) d.error(errPhase)
} }
} }
...@@ -621,14 +614,15 @@ func (d *decodeState) array(v reflect.Value) { ...@@ -621,14 +614,15 @@ func (d *decodeState) array(v reflect.Value) {
var nullLiteral = []byte("null") var nullLiteral = []byte("null")
var textUnmarshalerType = reflect.TypeOf(new(encoding.TextUnmarshaler)).Elem() var textUnmarshalerType = reflect.TypeOf(new(encoding.TextUnmarshaler)).Elem()
// object consumes an object from d.data[d.off-1:], decoding into the value v. // object consumes an object from d.data[d.off-1:], decoding into v.
// the first byte ('{') of the object has been read already. // The first byte ('{') of the object has been read already.
func (d *decodeState) object(v reflect.Value) { func (d *decodeState) object(v reflect.Value) {
// Check for unmarshaler. // Check for unmarshaler.
u, ut, pv := d.indirect(v, false) u, ut, pv := indirect(v, false)
if u != nil { if u != nil {
d.off-- start := d.readIndex()
err := u.UnmarshalJSON(d.next()) d.skip()
err := u.UnmarshalJSON(d.data[start:d.off])
if err != nil { if err != nil {
d.error(err) d.error(err)
} }
...@@ -636,8 +630,7 @@ func (d *decodeState) object(v reflect.Value) { ...@@ -636,8 +630,7 @@ func (d *decodeState) object(v reflect.Value) {
} }
if ut != nil { if ut != nil {
d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)})
d.off-- d.skip()
d.next() // skip over { } in input
return return
} }
v = pv v = pv
...@@ -664,8 +657,7 @@ func (d *decodeState) object(v reflect.Value) { ...@@ -664,8 +657,7 @@ func (d *decodeState) object(v reflect.Value) {
default: default:
if !reflect.PtrTo(t.Key()).Implements(textUnmarshalerType) { if !reflect.PtrTo(t.Key()).Implements(textUnmarshalerType) {
d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)})
d.off-- d.skip()
d.next() // skip over { } in input
return return
} }
} }
...@@ -676,8 +668,7 @@ func (d *decodeState) object(v reflect.Value) { ...@@ -676,8 +668,7 @@ func (d *decodeState) object(v reflect.Value) {
// ok // ok
default: default:
d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)})
d.off-- d.skip()
d.next() // skip over { } in input
return return
} }
...@@ -685,19 +676,19 @@ func (d *decodeState) object(v reflect.Value) { ...@@ -685,19 +676,19 @@ func (d *decodeState) object(v reflect.Value) {
for { for {
// Read opening " of string key or closing }. // Read opening " of string key or closing }.
op := d.scanWhile(scanSkipSpace) d.scanWhile(scanSkipSpace)
if op == scanEndObject { if d.opcode == scanEndObject {
// closing } - can only happen on first iteration. // closing } - can only happen on first iteration.
break break
} }
if op != scanBeginLiteral { if d.opcode != scanBeginLiteral {
d.error(errPhase) d.error(errPhase)
} }
// Read key. // Read key.
start := d.off - 1 start := d.readIndex()
op = d.scanWhile(scanContinue) d.scanWhile(scanContinue)
item := d.data[start : d.off-1] item := d.data[start:d.readIndex()]
key, ok := unquoteBytes(item) key, ok := unquoteBytes(item)
if !ok { if !ok {
d.error(errPhase) d.error(errPhase)
...@@ -761,12 +752,13 @@ func (d *decodeState) object(v reflect.Value) { ...@@ -761,12 +752,13 @@ func (d *decodeState) object(v reflect.Value) {
} }
// Read : before value. // Read : before value.
if op == scanSkipSpace { if d.opcode == scanSkipSpace {
op = d.scanWhile(scanSkipSpace) d.scanWhile(scanSkipSpace)
} }
if op != scanObjectKey { if d.opcode != scanObjectKey {
d.error(errPhase) d.error(errPhase)
} }
d.scanWhile(scanSkipSpace)
if destring { if destring {
switch qv := d.valueQuoted().(type) { switch qv := d.valueQuoted().(type) {
...@@ -819,11 +811,13 @@ func (d *decodeState) object(v reflect.Value) { ...@@ -819,11 +811,13 @@ func (d *decodeState) object(v reflect.Value) {
} }
// Next token must be , or }. // Next token must be , or }.
op = d.scanWhile(scanSkipSpace) if d.opcode == scanSkipSpace {
if op == scanEndObject { d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndObject {
break break
} }
if op != scanObjectValue { if d.opcode != scanObjectValue {
d.error(errPhase) d.error(errPhase)
} }
...@@ -832,21 +826,6 @@ func (d *decodeState) object(v reflect.Value) { ...@@ -832,21 +826,6 @@ func (d *decodeState) object(v reflect.Value) {
} }
} }
// literal consumes a literal from d.data[d.off-1:], decoding into the value v.
// The first byte of the literal has been read already
// (that's how the caller knows it's a literal).
func (d *decodeState) literal(v reflect.Value) {
// All bytes inside literal return scanContinue op code.
start := d.off - 1
op := d.scanWhile(scanContinue)
// Scan read one byte too far; back up.
d.off--
d.scan.undo(op)
d.literalStore(d.data[start:d.off], v, false)
}
// convertNumber converts the number literal s to a float64 or a Number // convertNumber converts the number literal s to a float64 or a Number
// depending on the setting of d.useNumber. // depending on the setting of d.useNumber.
func (d *decodeState) convertNumber(s string) (interface{}, error) { func (d *decodeState) convertNumber(s string) (interface{}, error) {
...@@ -875,7 +854,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -875,7 +854,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
return return
} }
isNull := item[0] == 'n' // null isNull := item[0] == 'n' // null
u, ut, pv := d.indirect(v, isNull) u, ut, pv := indirect(v, isNull)
if u != nil { if u != nil {
err := u.UnmarshalJSON(item) err := u.UnmarshalJSON(item)
if err != nil { if err != nil {
...@@ -897,7 +876,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -897,7 +876,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
default: default:
val = "number" val = "number"
} }
d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
} }
return return
} }
...@@ -944,7 +923,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -944,7 +923,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
if fromQuoted { if fromQuoted {
d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
} else { } else {
d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())})
} }
case reflect.Bool: case reflect.Bool:
v.SetBool(value) v.SetBool(value)
...@@ -952,7 +931,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -952,7 +931,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
if v.NumMethod() == 0 { if v.NumMethod() == 0 {
v.Set(reflect.ValueOf(value)) v.Set(reflect.ValueOf(value))
} else { } else {
d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())})
} }
} }
...@@ -967,10 +946,10 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -967,10 +946,10 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
} }
switch v.Kind() { switch v.Kind() {
default: default:
d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
case reflect.Slice: case reflect.Slice:
if v.Type().Elem().Kind() != reflect.Uint8 { if v.Type().Elem().Kind() != reflect.Uint8 {
d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
break break
} }
b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) b := make([]byte, base64.StdEncoding.DecodedLen(len(s)))
...@@ -986,7 +965,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -986,7 +965,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
if v.NumMethod() == 0 { if v.NumMethod() == 0 {
v.Set(reflect.ValueOf(string(s))) v.Set(reflect.ValueOf(string(s)))
} else { } else {
d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
} }
} }
...@@ -1011,7 +990,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -1011,7 +990,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
if fromQuoted { if fromQuoted {
d.error(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) d.error(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
} else { } else {
d.error(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.off)}) d.error(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())})
} }
case reflect.Interface: case reflect.Interface:
n, err := d.convertNumber(s) n, err := d.convertNumber(s)
...@@ -1020,7 +999,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -1020,7 +999,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
break break
} }
if v.NumMethod() != 0 { if v.NumMethod() != 0 {
d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())})
break break
} }
v.Set(reflect.ValueOf(n)) v.Set(reflect.ValueOf(n))
...@@ -1028,7 +1007,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -1028,7 +1007,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
n, err := strconv.ParseInt(s, 10, 64) n, err := strconv.ParseInt(s, 10, 64)
if err != nil || v.OverflowInt(n) { if err != nil || v.OverflowInt(n) {
d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())})
break break
} }
v.SetInt(n) v.SetInt(n)
...@@ -1036,7 +1015,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -1036,7 +1015,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
n, err := strconv.ParseUint(s, 10, 64) n, err := strconv.ParseUint(s, 10, 64)
if err != nil || v.OverflowUint(n) { if err != nil || v.OverflowUint(n) {
d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())})
break break
} }
v.SetUint(n) v.SetUint(n)
...@@ -1044,7 +1023,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -1044,7 +1023,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
case reflect.Float32, reflect.Float64: case reflect.Float32, reflect.Float64:
n, err := strconv.ParseFloat(s, v.Type().Bits()) n, err := strconv.ParseFloat(s, v.Type().Bits())
if err != nil || v.OverflowFloat(n) { if err != nil || v.OverflowFloat(n) {
d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)}) d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())})
break break
} }
v.SetFloat(n) v.SetFloat(n)
...@@ -1057,18 +1036,21 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -1057,18 +1036,21 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
// but they avoid the weight of reflection in this common case. // but they avoid the weight of reflection in this common case.
// valueInterface is like value but returns interface{} // valueInterface is like value but returns interface{}
func (d *decodeState) valueInterface() interface{} { func (d *decodeState) valueInterface() (val interface{}) {
switch d.scanWhile(scanSkipSpace) { switch d.opcode {
default: default:
d.error(errPhase) d.error(errPhase)
panic("unreachable") panic("unreachable")
case scanBeginArray: case scanBeginArray:
return d.arrayInterface() val = d.arrayInterface()
d.scanNext()
case scanBeginObject: case scanBeginObject:
return d.objectInterface() val = d.objectInterface()
d.scanNext()
case scanBeginLiteral: case scanBeginLiteral:
return d.literalInterface() val = d.literalInterface()
} }
return
} }
// arrayInterface is like array but returns []interface{}. // arrayInterface is like array but returns []interface{}.
...@@ -1076,23 +1058,21 @@ func (d *decodeState) arrayInterface() []interface{} { ...@@ -1076,23 +1058,21 @@ func (d *decodeState) arrayInterface() []interface{} {
var v = make([]interface{}, 0) var v = make([]interface{}, 0)
for { for {
// Look ahead for ] - can only happen on first iteration. // Look ahead for ] - can only happen on first iteration.
op := d.scanWhile(scanSkipSpace) d.scanWhile(scanSkipSpace)
if op == scanEndArray { if d.opcode == scanEndArray {
break break
} }
// Back up so d.value can have the byte we just read.
d.off--
d.scan.undo(op)
v = append(v, d.valueInterface()) v = append(v, d.valueInterface())
// Next token must be , or ]. // Next token must be , or ].
op = d.scanWhile(scanSkipSpace) if d.opcode == scanSkipSpace {
if op == scanEndArray { d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndArray {
break break
} }
if op != scanArrayValue { if d.opcode != scanArrayValue {
d.error(errPhase) d.error(errPhase)
} }
} }
...@@ -1104,57 +1084,59 @@ func (d *decodeState) objectInterface() map[string]interface{} { ...@@ -1104,57 +1084,59 @@ func (d *decodeState) objectInterface() map[string]interface{} {
m := make(map[string]interface{}) m := make(map[string]interface{})
for { for {
// Read opening " of string key or closing }. // Read opening " of string key or closing }.
op := d.scanWhile(scanSkipSpace) d.scanWhile(scanSkipSpace)
if op == scanEndObject { if d.opcode == scanEndObject {
// closing } - can only happen on first iteration. // closing } - can only happen on first iteration.
break break
} }
if op != scanBeginLiteral { if d.opcode != scanBeginLiteral {
d.error(errPhase) d.error(errPhase)
} }
// Read string key. // Read string key.
start := d.off - 1 start := d.readIndex()
op = d.scanWhile(scanContinue) d.scanWhile(scanContinue)
item := d.data[start : d.off-1] item := d.data[start:d.readIndex()]
key, ok := unquote(item) key, ok := unquote(item)
if !ok { if !ok {
d.error(errPhase) d.error(errPhase)
} }
// Read : before value. // Read : before value.
if op == scanSkipSpace { if d.opcode == scanSkipSpace {
op = d.scanWhile(scanSkipSpace) d.scanWhile(scanSkipSpace)
} }
if op != scanObjectKey { if d.opcode != scanObjectKey {
d.error(errPhase) d.error(errPhase)
} }
d.scanWhile(scanSkipSpace)
// Read value. // Read value.
m[key] = d.valueInterface() m[key] = d.valueInterface()
// Next token must be , or }. // Next token must be , or }.
op = d.scanWhile(scanSkipSpace) if d.opcode == scanSkipSpace {
if op == scanEndObject { d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndObject {
break break
} }
if op != scanObjectValue { if d.opcode != scanObjectValue {
d.error(errPhase) d.error(errPhase)
} }
} }
return m return m
} }
// literalInterface is like literal but returns an interface value. // literalInterface consumes and returns a literal from d.data[d.off-1:] and
// it reads the following byte ahead. The first byte of the literal has been
// read already (that's how the caller knows it's a literal).
func (d *decodeState) literalInterface() interface{} { func (d *decodeState) literalInterface() interface{} {
// All bytes inside literal return scanContinue op code. // All bytes inside literal return scanContinue op code.
start := d.off - 1 start := d.readIndex()
op := d.scanWhile(scanContinue) d.scanWhile(scanContinue)
// Scan read one byte too far; back up. item := d.data[start:d.readIndex()]
d.off--
d.scan.undo(op)
item := d.data[start:d.off]
switch c := item[0]; c { switch c := item[0]; c {
case 'n': // null case 'n': // null
......
...@@ -8,7 +8,7 @@ package json ...@@ -8,7 +8,7 @@ package json
// Just about at the limit of what is reasonable to write by hand. // Just about at the limit of what is reasonable to write by hand.
// Some parts are a bit tedious, but overall it nicely factors out the // Some parts are a bit tedious, but overall it nicely factors out the
// otherwise common code from the multiple scanning functions // otherwise common code from the multiple scanning functions
// in this package (Compact, Indent, checkValid, nextValue, etc). // in this package (Compact, Indent, checkValid, etc).
// //
// This file starts with two simple examples using the scanner // This file starts with two simple examples using the scanner
// before diving into the scanner itself. // before diving into the scanner itself.
...@@ -36,35 +36,6 @@ func checkValid(data []byte, scan *scanner) error { ...@@ -36,35 +36,6 @@ func checkValid(data []byte, scan *scanner) error {
return nil return nil
} }
// nextValue splits data after the next whole JSON value,
// returning that value and the bytes that follow it as separate slices.
// scan is passed in for use by nextValue to avoid an allocation.
func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) {
scan.reset()
for i, c := range data {
v := scan.step(scan, c)
if v >= scanEndObject {
switch v {
// probe the scanner with a space to determine whether we will
// get scanEnd on the next character. Otherwise, if the next character
// is not a space, scanEndTop allocates a needless error.
case scanEndObject, scanEndArray:
if scan.step(scan, ' ') == scanEnd {
return data[:i+1], data[i+1:], nil
}
case scanError:
return nil, nil, scan.err
case scanEnd:
return data[:i], data[i:], nil
}
}
}
if scan.eof() == scanError {
return nil, nil, scan.err
}
return data, nil, nil
}
// A SyntaxError is a description of a JSON syntax error. // A SyntaxError is a description of a JSON syntax error.
type SyntaxError struct { type SyntaxError struct {
msg string // description of error msg string // description of error
...@@ -101,11 +72,6 @@ type scanner struct { ...@@ -101,11 +72,6 @@ type scanner struct {
// Error that happened, if any. // Error that happened, if any.
err error err error
// 1-byte redo (see undo method)
redo bool
redoCode int
redoState func(*scanner, byte) int
// total bytes consumed, updated by decoder.Decode // total bytes consumed, updated by decoder.Decode
bytes int64 bytes int64
} }
...@@ -151,7 +117,6 @@ func (s *scanner) reset() { ...@@ -151,7 +117,6 @@ func (s *scanner) reset() {
s.step = stateBeginValue s.step = stateBeginValue
s.parseState = s.parseState[0:0] s.parseState = s.parseState[0:0]
s.err = nil s.err = nil
s.redo = false
s.endTop = false s.endTop = false
} }
...@@ -184,7 +149,6 @@ func (s *scanner) pushParseState(p int) { ...@@ -184,7 +149,6 @@ func (s *scanner) pushParseState(p int) {
func (s *scanner) popParseState() { func (s *scanner) popParseState() {
n := len(s.parseState) - 1 n := len(s.parseState) - 1
s.parseState = s.parseState[0:n] s.parseState = s.parseState[0:n]
s.redo = false
if n == 0 { if n == 0 {
s.step = stateEndTop s.step = stateEndTop
s.endTop = true s.endTop = true
...@@ -607,22 +571,3 @@ func quoteChar(c byte) string { ...@@ -607,22 +571,3 @@ func quoteChar(c byte) string {
s := strconv.Quote(string(c)) s := strconv.Quote(string(c))
return "'" + s[1:len(s)-1] + "'" return "'" + s[1:len(s)-1] + "'"
} }
// undo causes the scanner to return scanCode from the next state transition.
// This gives callers a simple 1-byte undo mechanism.
func (s *scanner) undo(scanCode int) {
if s.redo {
panic("json: invalid use of scanner")
}
s.redoCode = scanCode
s.redoState = s.step
s.step = stateRedo
s.redo = true
}
// stateRedo helps implement the scanner's 1-byte undo.
func stateRedo(s *scanner, c byte) int {
s.redo = false
s.step = s.redoState
return s.redoCode
}
...@@ -200,43 +200,6 @@ func TestIndentErrors(t *testing.T) { ...@@ -200,43 +200,6 @@ func TestIndentErrors(t *testing.T) {
} }
} }
func TestNextValueBig(t *testing.T) {
initBig()
var scan scanner
item, rest, err := nextValue(jsonBig, &scan)
if err != nil {
t.Fatalf("nextValue: %s", err)
}
if len(item) != len(jsonBig) || &item[0] != &jsonBig[0] {
t.Errorf("invalid item: %d %d", len(item), len(jsonBig))
}
if len(rest) != 0 {
t.Errorf("invalid rest: %d", len(rest))
}
item, rest, err = nextValue(append(jsonBig, "HELLO WORLD"...), &scan)
if err != nil {
t.Fatalf("nextValue extra: %s", err)
}
if len(item) != len(jsonBig) {
t.Errorf("invalid item: %d %d", len(item), len(jsonBig))
}
if string(rest) != "HELLO WORLD" {
t.Errorf("invalid rest: %d", len(rest))
}
}
var benchScan scanner
func BenchmarkSkipValue(b *testing.B) {
initBig()
b.ResetTimer()
for i := 0; i < b.N; i++ {
nextValue(jsonBig, &benchScan)
}
b.SetBytes(int64(len(jsonBig)))
}
func diff(t *testing.T, a, b []byte) { func diff(t *testing.T, a, b []byte) {
for i := 0; ; i++ { for i := 0; ; i++ {
if i >= len(a) || i >= len(b) || a[i] != b[i] { if i >= len(a) || i >= len(b) || a[i] != b[i] {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment