Commit eee07a8e authored by Filippo Valsorda's avatar Filippo Valsorda Committed by Andrew Bonventre

Revert "encoding/json: avoid work when unquoting strings"

This reverts CL 151157.

CL 151157 introduced a crash when decoding into ",string" fields. It
came with a moderate speedup, so at this stage of the release cycle
let's just revert it, and reapply it in Go 1.14 with the fix in CL 190659.

Also applied the test cases from CL 190659.

Updates #33728

Change-Id: Ie46e2bc15224b251888580daf6b79d5865f3878e
Reviewed-on: https://go-review.googlesource.com/c/go/+/190909
Run-TryBot: Andrew Bonventre <andybons@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarAndrew Bonventre <andybons@golang.org>
parent c61c29fe
...@@ -272,9 +272,6 @@ type decodeState struct { ...@@ -272,9 +272,6 @@ type decodeState struct {
savedError error savedError error
useNumber bool useNumber bool
disallowUnknownFields bool disallowUnknownFields bool
// safeUnquote is the number of current string literal bytes that don't
// need to be unquoted. When negative, no bytes need unquoting.
safeUnquote int
} }
// readIndex returns the position of the last byte read. // readIndex returns the position of the last byte read.
...@@ -376,27 +373,13 @@ func (d *decodeState) rescanLiteral() { ...@@ -376,27 +373,13 @@ func (d *decodeState) rescanLiteral() {
Switch: Switch:
switch data[i-1] { switch data[i-1] {
case '"': // string case '"': // string
// safeUnquote is initialized at -1, which means that all bytes
// checked so far can be unquoted at a later time with no work
// at all. When reaching the closing '"', if safeUnquote is
// still -1, all bytes can be unquoted with no work. Otherwise,
// only those bytes up until the first '\\' or non-ascii rune
// can be safely unquoted.
safeUnquote := -1
for ; i < len(data); i++ { for ; i < len(data); i++ {
if c := data[i]; c == '\\' { switch data[i] {
if safeUnquote < 0 { // first unsafe byte case '\\':
safeUnquote = int(i - d.off)
}
i++ // escaped char i++ // escaped char
} else if c == '"' { case '"':
d.safeUnquote = safeUnquote
i++ // tokenize the closing quote too i++ // tokenize the closing quote too
break Switch break Switch
} else if c >= utf8.RuneSelf {
if safeUnquote < 0 { // first unsafe byte
safeUnquote = int(i - d.off)
}
} }
} }
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number
...@@ -749,7 +732,7 @@ func (d *decodeState) object(v reflect.Value) error { ...@@ -749,7 +732,7 @@ func (d *decodeState) object(v reflect.Value) error {
start := d.readIndex() start := d.readIndex()
d.rescanLiteral() d.rescanLiteral()
item := d.data[start:d.readIndex()] item := d.data[start:d.readIndex()]
key, ok := d.unquoteBytes(item) key, ok := unquoteBytes(item)
if !ok { if !ok {
panic(phasePanicMsg) panic(phasePanicMsg)
} }
...@@ -950,7 +933,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -950,7 +933,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
return nil return nil
} }
s, ok := d.unquoteBytes(item) s, ok := unquoteBytes(item)
if !ok { if !ok {
if fromQuoted { if fromQuoted {
return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
...@@ -1001,7 +984,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool ...@@ -1001,7 +984,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
} }
case '"': // string case '"': // string
s, ok := d.unquoteBytes(item) s, ok := unquoteBytes(item)
if !ok { if !ok {
if fromQuoted { if fromQuoted {
return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
...@@ -1159,7 +1142,7 @@ func (d *decodeState) objectInterface() map[string]interface{} { ...@@ -1159,7 +1142,7 @@ func (d *decodeState) objectInterface() map[string]interface{} {
start := d.readIndex() start := d.readIndex()
d.rescanLiteral() d.rescanLiteral()
item := d.data[start:d.readIndex()] item := d.data[start:d.readIndex()]
key, ok := d.unquote(item) key, ok := unquote(item)
if !ok { if !ok {
panic(phasePanicMsg) panic(phasePanicMsg)
} }
...@@ -1208,7 +1191,7 @@ func (d *decodeState) literalInterface() interface{} { ...@@ -1208,7 +1191,7 @@ func (d *decodeState) literalInterface() interface{} {
return c == 't' return c == 't'
case '"': // string case '"': // string
s, ok := d.unquote(item) s, ok := unquote(item)
if !ok { if !ok {
panic(phasePanicMsg) panic(phasePanicMsg)
} }
...@@ -1251,21 +1234,38 @@ func getu4(s []byte) rune { ...@@ -1251,21 +1234,38 @@ func getu4(s []byte) rune {
// unquote converts a quoted JSON string literal s into an actual string t. // unquote converts a quoted JSON string literal s into an actual string t.
// The rules are different than for Go, so cannot use strconv.Unquote. // The rules are different than for Go, so cannot use strconv.Unquote.
func (d *decodeState) unquote(s []byte) (t string, ok bool) { func unquote(s []byte) (t string, ok bool) {
s, ok = d.unquoteBytes(s) s, ok = unquoteBytes(s)
t = string(s) t = string(s)
return return
} }
func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) { func unquoteBytes(s []byte) (t []byte, ok bool) {
r := d.safeUnquote if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
// The bytes have been scanned, so we know that the first and last bytes return
// are double quotes. }
s = s[1 : len(s)-1] s = s[1 : len(s)-1]
// If there are no unusual characters, no unquoting is needed, so return // Check for unusual characters. If there are none,
// a slice of the original bytes. // then no unquoting is needed, so return a slice of the
if r == -1 { // original bytes.
r := 0
for r < len(s) {
c := s[r]
if c == '\\' || c == '"' || c < ' ' {
break
}
if c < utf8.RuneSelf {
r++
continue
}
rr, size := utf8.DecodeRune(s[r:])
if rr == utf8.RuneError && size == 1 {
break
}
r += size
}
if r == len(s) {
return s, true return s, true
} }
......
...@@ -1250,6 +1250,8 @@ var wrongStringTests = []wrongStringTest{ ...@@ -1250,6 +1250,8 @@ var wrongStringTests = []wrongStringTest{
{`{"result":"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "foo" into string`}, {`{"result":"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "foo" into string`},
{`{"result":"123"}`, `json: invalid use of ,string struct tag, trying to unmarshal "123" into string`}, {`{"result":"123"}`, `json: invalid use of ,string struct tag, trying to unmarshal "123" into string`},
{`{"result":123}`, `json: invalid use of ,string struct tag, trying to unmarshal unquoted value into string`}, {`{"result":123}`, `json: invalid use of ,string struct tag, trying to unmarshal unquoted value into string`},
{`{"result":"\""}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"" into string`},
{`{"result":"\"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"foo" into string`},
} }
// If people misuse the ,string modifier, the error message should be // If people misuse the ,string modifier, the error message should be
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment