Commit 5c39341f authored by Kirill Smelkov's avatar Kirill Smelkov

strconv: Teach UnquoteChar to distinguish unexpected EOF from syntax errors

This is needed to get proper error reporting in situations where
UnquoteChar is used to decode input stream step by step: there if we see
truncated version of valid character as input always returning ErrSyntax
effectively blocks caller from determining what it was - a real syntax
error or unexpected end of stream.

Unquote error behaviour is preserved to return ErrSyntax always, because
Unquote operates on whole input at once - not as on stream - and
anything wrong there should be seen as really an error in syntax.

Since UnquoteChar is internally used by Unquote, and we already have
Unquote tests to cover all kinds of valid input, we only need to add
tests to cover UnquoteChar error behaviour. Though, unfortunately, we
cannot easily reuse error-behaviour Unquote tests for this.

P.S.

My original use-case for this change is ogórek where UnquoteChar is used
to decode unicode string encoded in Python pickles:

https://github.com/kisielk/og-rek/blob/c7dbf2e4/ogorek.go#L530

Change-Id: I611e7f5795560da488396bc93135a81a56482b75
parent 79fab70a
......@@ -6,7 +6,10 @@
package strconv
import "unicode/utf8"
import (
"io"
"unicode/utf8"
)
const lowerhex = "0123456789abcdef"
......@@ -250,7 +253,7 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
// hard case: c is backslash
if len(s) <= 1 {
err = ErrSyntax
err = io.ErrUnexpectedEOF
return
}
c := s[1]
......@@ -283,7 +286,7 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
}
var v rune
if len(s) < n {
err = ErrSyntax
err = io.ErrUnexpectedEOF
return
}
for j := 0; j < n; j++ {
......@@ -309,7 +312,7 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
case '0', '1', '2', '3', '4', '5', '6', '7':
v := rune(c) - '0'
if len(s) < 2 {
err = ErrSyntax
err = io.ErrUnexpectedEOF
return
}
for j := 0; j < 2; j++ { // one digit already; two more
......@@ -399,7 +402,8 @@ func Unquote(s string) (string, error) {
for len(s) > 0 {
c, multibyte, ss, err := UnquoteChar(s, quote)
if err != nil {
return "", err
// turn any error from UnquoteChar into syntax error
return "", ErrSyntax
}
s = ss
if c < utf8.RuneSelf || !multibyte {
......
......@@ -5,6 +5,7 @@
package strconv_test
import (
"io"
. "strconv"
"testing"
"unicode"
......@@ -305,6 +306,29 @@ var misquoted = []string{
"'\n'",
}
type unQuoteCharErrTest struct {
in string
err error
}
var misquotedChars = []unQuoteCharErrTest{
{`\000`, nil}, // nil mean unquoteChar should be ok -> test for io.ErrUnexpectedEOF
{`\x00`, nil}, // on truncated input
{`\u0000`, nil},
{`\U00000000`, nil},
{`"`, ErrSyntax},
{`\'`, ErrSyntax},
{`\q`, ErrSyntax},
{`\z`, ErrSyntax},
{`\008`, ErrSyntax},
{`\400`, ErrSyntax},
{`\x0z`, ErrSyntax},
{`\u000z`, ErrSyntax},
{`\U0000000z`, ErrSyntax},
{`\U12345678`, ErrSyntax},
}
func TestUnquote(t *testing.T) {
for _, tt := range unquotetests {
if out, err := Unquote(tt.in); err != nil || out != tt.out {
......@@ -324,6 +348,23 @@ func TestUnquote(t *testing.T) {
t.Errorf("Unquote(%#q) = %q, %v want %q, %v", s, out, err, "", ErrSyntax)
}
}
for _, tt := range misquotedChars {
_, _, _, err := UnquoteChar(tt.in, '"')
if err != tt.err {
t.Errorf("UnquoteChar(%#q) -> err = %v want %v", tt.in, err, tt.err)
}
if tt.err == nil {
// truncated valid input should result in unexpected EOF
for l := len(tt.in) - 1; l > 0; l-- {
_, _, _, err2 := UnquoteChar(tt.in[:l], '"')
if err2 != io.ErrUnexpectedEOF {
t.Errorf("UnquoteChar(%#q) -> err = %v want %v", tt.in[:l], err2, io.ErrUnexpectedEOF)
}
}
}
}
}
func BenchmarkUnquoteEasy(b *testing.B) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment