Commit f2f3b8fa authored by Rob Pike's avatar Rob Pike

strconv: change Quote to be Unicode-friendly,

add QuoteToASCII.
The Quote and QuoteRune functions now let printable
runes (as defined by unicode.IsPrint) through.  When
true 7-bit clean stuff is necessary, there are now two
new functions: QuoteToASCII and QuoteRuneToASCII.

Printf("%q") uses Quote. To get the old behavior, it
will now be necessary to say
        Printf("%s", strconv.QuoteToASCII(s))
but that should rarely be necessary.

R=golang-dev, gri, r
CC=golang-dev
https://golang.org/cl/4561061
parent 05348ab0
...@@ -132,15 +132,15 @@ var fmttests = []struct { ...@@ -132,15 +132,15 @@ var fmttests = []struct {
{"%q", `"`, `"\""`}, {"%q", `"`, `"\""`},
{"%q", "\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`}, {"%q", "\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`},
{"%q", "abc\xffdef", `"abc\xffdef"`}, {"%q", "abc\xffdef", `"abc\xffdef"`},
{"%q", "\u263a", `"\u263a"`}, {"%q", "\u263a", `""`},
{"%q", "\U0010ffff", `"\U0010ffff"`}, {"%q", "\U0010ffff", `"\U0010ffff"`},
// escaped characters // escaped characters
{"%q", 'x', `'x'`}, {"%q", 'x', `'x'`},
{"%q", 0, `'\x00'`}, {"%q", 0, `'\x00'`},
{"%q", '\n', `'\n'`}, {"%q", '\n', `'\n'`},
{"%q", '\u1234', `'\u1234'`}, {"%q", '\u0e00', `'\u0e00'`}, // not a printable rune.
{"%q", '\U00012345', `'\U00012345'`}, {"%q", '\U000c2345', `'\U000c2345'`}, // not a printable rune.
{"%q", int64(0x7FFFFFFF), `%!q(int64=2147483647)`}, {"%q", int64(0x7FFFFFFF), `%!q(int64=2147483647)`},
{"%q", uint64(0xFFFFFFFF), `%!q(uint64=4294967295)`}, {"%q", uint64(0xFFFFFFFF), `%!q(uint64=4294967295)`},
{"%q", '"', `'"'`}, {"%q", '"', `'"'`},
...@@ -148,7 +148,7 @@ var fmttests = []struct { ...@@ -148,7 +148,7 @@ var fmttests = []struct {
// width // width
{"%5s", "abc", " abc"}, {"%5s", "abc", " abc"},
{"%2s", "\u263a", " \u263a"}, {"%2s", "\u263a", " "},
{"%-5s", "abc", "abc "}, {"%-5s", "abc", "abc "},
{"%-8q", "abc", `"abc" `}, {"%-8q", "abc", `"abc" `},
{"%05s", "abc", "00abc"}, {"%05s", "abc", "00abc"},
...@@ -158,9 +158,9 @@ var fmttests = []struct { ...@@ -158,9 +158,9 @@ var fmttests = []struct {
{"%.5s", "日本語日本語", "日本語日本"}, {"%.5s", "日本語日本語", "日本語日本"},
{"%.5s", []byte("日本語日本語"), "日本語日本"}, {"%.5s", []byte("日本語日本語"), "日本語日本"},
{"%.5q", "abcdefghijklmnopqrstuvwxyz", `"abcde"`}, {"%.5q", "abcdefghijklmnopqrstuvwxyz", `"abcde"`},
{"%.3q", "日本語日本語", `"\u65e5\u672c\u8a9e"`}, {"%.3q", "日本語日本語", `"日本語"`},
{"%.3q", []byte("日本語日本語"), `"\u65e5\u672c\u8a9e"`}, {"%.3q", []byte("日本語日本語"), `"日本語"`},
{"%10.1q", "日本語日本語", ` "\u65e5"`}, {"%10.1q", "日本語日本語", ` "日"`},
// integers // integers
{"%d", 12345, "12345"}, {"%d", 12345, "12345"},
......
...@@ -652,7 +652,7 @@ var errors = []struct { ...@@ -652,7 +652,7 @@ var errors = []struct {
}{ }{
{"\a", token.ILLEGAL, 0, "illegal character '\\a'"}, {"\a", token.ILLEGAL, 0, "illegal character '\\a'"},
{`#`, token.ILLEGAL, 0, "illegal character '#'"}, {`#`, token.ILLEGAL, 0, "illegal character '#'"},
{`…`, token.ILLEGAL, 0, "illegal character '\\u2026'"}, {`…`, token.ILLEGAL, 0, "illegal character ''"},
{`' '`, token.CHAR, 0, ""}, {`' '`, token.CHAR, 0, ""},
{`''`, token.CHAR, 0, "illegal character literal"}, {`''`, token.CHAR, 0, "illegal character literal"},
{`'\8'`, token.CHAR, 2, "unknown escape sequence"}, {`'\8'`, token.CHAR, 2, "unknown escape sequence"},
......
...@@ -14,56 +14,68 @@ import ( ...@@ -14,56 +14,68 @@ import (
const lowerhex = "0123456789abcdef" const lowerhex = "0123456789abcdef"
func quoteWith(s string, quote byte) string { func quoteWith(s string, quote byte, ASCIIonly bool) string {
var buf bytes.Buffer var buf bytes.Buffer
buf.WriteByte(quote) buf.WriteByte(quote)
for ; len(s) > 0; s = s[1:] { for width := 0; len(s) > 0; s = s[width:] {
switch c := s[0]; { rune := int(s[0])
case c == quote: width = 1
if rune >= utf8.RuneSelf {
rune, width = utf8.DecodeRuneInString(s)
}
if width == 1 && rune == utf8.RuneError {
goto printEscX
}
if rune == int(quote) || rune == '\\' { // always backslashed
buf.WriteByte('\\') buf.WriteByte('\\')
buf.WriteByte(quote) buf.WriteByte(byte(rune))
case c == '\\': continue
buf.WriteString(`\\`) }
case ' ' <= c && c <= '~': if ASCIIonly {
buf.WriteString(string(c)) if rune <= unicode.MaxASCII && unicode.IsPrint(rune) {
case c == '\a': buf.WriteRune(rune)
continue
}
} else if unicode.IsPrint(rune) {
buf.WriteRune(rune)
continue
}
switch rune {
case '\a':
buf.WriteString(`\a`) buf.WriteString(`\a`)
case c == '\b': case '\b':
buf.WriteString(`\b`) buf.WriteString(`\b`)
case c == '\f': case '\f':
buf.WriteString(`\f`) buf.WriteString(`\f`)
case c == '\n': case '\n':
buf.WriteString(`\n`) buf.WriteString(`\n`)
case c == '\r': case '\r':
buf.WriteString(`\r`) buf.WriteString(`\r`)
case c == '\t': case '\t':
buf.WriteString(`\t`) buf.WriteString(`\t`)
case c == '\v': case '\v':
buf.WriteString(`\v`) buf.WriteString(`\v`)
default:
case c >= utf8.RuneSelf && utf8.FullRuneInString(s): switch {
r, size := utf8.DecodeRuneInString(s) case rune < ' ':
if r == utf8.RuneError && size == 1 { printEscX:
goto EscX buf.WriteString(`\x`)
} buf.WriteByte(lowerhex[s[0]>>4])
s = s[size-1:] // next iteration will slice off 1 more buf.WriteByte(lowerhex[s[0]&0xF])
if r < 0x10000 { case rune > unicode.MaxRune:
rune = 0xFFFD
fallthrough
case rune < 0x10000:
buf.WriteString(`\u`) buf.WriteString(`\u`)
for j := uint(0); j < 4; j++ { for s := 12; s >= 0; s -= 4 {
buf.WriteByte(lowerhex[(r>>(12-4*j))&0xF]) buf.WriteByte(lowerhex[rune>>uint(s)&0xF])
} }
} else { default:
buf.WriteString(`\U`) buf.WriteString(`\U`)
for j := uint(0); j < 8; j++ { for s := 28; s >= 0; s -= 4 {
buf.WriteByte(lowerhex[(r>>(28-4*j))&0xF]) buf.WriteByte(lowerhex[rune>>uint(s)&0xF])
} }
} }
default:
EscX:
buf.WriteString(`\x`)
buf.WriteByte(lowerhex[c>>4])
buf.WriteByte(lowerhex[c&0xF])
} }
} }
buf.WriteByte(quote) buf.WriteByte(quote)
...@@ -71,21 +83,38 @@ func quoteWith(s string, quote byte) string { ...@@ -71,21 +83,38 @@ func quoteWith(s string, quote byte) string {
} }
// Quote returns a double-quoted Go string literal // Quote returns a double-quoted Go string literal representing s. The
// representing s. The returned string uses Go escape // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// sequences (\t, \n, \xFF, \u0100) for control characters // control characters and non-printable characters as defined by
// and non-ASCII characters. // unicode.IsPrint.
func Quote(s string) string { func Quote(s string) string {
return quoteWith(s, '"') return quoteWith(s, '"', false)
}
// QuoteToASCII returns a double-quoted Go string literal representing s.
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// non-ASCII characters and non-printable characters as defined by
// unicode.IsPrint.
func QuoteToASCII(s string) string {
return quoteWith(s, '"', true)
} }
// QuoteRune returns a single-quoted Go character literal // QuoteRune returns a single-quoted Go character literal representing the
// representing the rune. The returned string uses Go escape // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
// sequences (\t, \n, \xFF, \u0100) for control characters // for control characters and non-printable characters as defined by
// and non-ASCII characters. // unicode.IsPrint.
func QuoteRune(rune int) string { func QuoteRune(rune int) string {
// TODO: avoid the allocation here. // TODO: avoid the allocation here.
return quoteWith(string(rune), '\'') return quoteWith(string(rune), '\'', false)
}
// QuoteRuneToASCII returns a single-quoted Go character literal representing
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
// \u0100) for non-ASCII characters and non-printable characters as defined
// by unicode.IsPrint.
func QuoteRuneToASCII(rune int) string {
// TODO: avoid the allocation here.
return quoteWith(string(rune), '\'', true)
} }
// CanBackquote returns whether the string s would be // CanBackquote returns whether the string s would be
......
...@@ -11,17 +11,18 @@ import ( ...@@ -11,17 +11,18 @@ import (
) )
type quoteTest struct { type quoteTest struct {
in string in string
out string out string
ascii string
} }
var quotetests = []quoteTest{ var quotetests = []quoteTest{
{"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`}, {"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`},
{"\\", `"\\"`}, {"\\", `"\\"`, `"\\"`},
{"abc\xffdef", `"abc\xffdef"`}, {"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`},
{"\u263a", `"\u263a"`}, {"\u263a", `"☺"`, `"\u263a"`},
{"\U0010ffff", `"\U0010ffff"`}, {"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`},
{"\x04", `"\x04"`}, {"\x04", `"\x04"`, `"\x04"`},
} }
func TestQuote(t *testing.T) { func TestQuote(t *testing.T) {
...@@ -32,20 +33,30 @@ func TestQuote(t *testing.T) { ...@@ -32,20 +33,30 @@ func TestQuote(t *testing.T) {
} }
} }
func TestQuoteToASCII(t *testing.T) {
for _, tt := range quotetests {
if out := QuoteToASCII(tt.in); out != tt.ascii {
t.Errorf("QuoteToASCII(%s) = %s, want %s", tt.in, out, tt.ascii)
}
}
}
type quoteRuneTest struct { type quoteRuneTest struct {
in int in int
out string out string
ascii string
} }
var quoterunetests = []quoteRuneTest{ var quoterunetests = []quoteRuneTest{
{'a', `'a'`}, {'a', `'a'`, `'a'`},
{'\a', `'\a'`}, {'\a', `'\a'`, `'\a'`},
{'\\', `'\\'`}, {'\\', `'\\'`, `'\\'`},
{0xFF, `'\u00ff'`}, {0xFF, `'ÿ'`, `'\u00ff'`},
{0x263a, `'\u263a'`}, {0x263a, `'☺'`, `'\u263a'`},
{0x0010ffff, `'\U0010ffff'`}, {0xfffd, `'�'`, `'\ufffd'`},
{0x0010ffff + 1, `'\ufffd'`}, {0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`},
{0x04, `'\x04'`}, {0x0010ffff + 1, `'�'`, `'\ufffd'`},
{0x04, `'\x04'`, `'\x04'`},
} }
func TestQuoteRune(t *testing.T) { func TestQuoteRune(t *testing.T) {
...@@ -56,6 +67,14 @@ func TestQuoteRune(t *testing.T) { ...@@ -56,6 +67,14 @@ func TestQuoteRune(t *testing.T) {
} }
} }
func TestQuoteRuneToASCII(t *testing.T) {
for _, tt := range quoterunetests {
if out := QuoteRuneToASCII(tt.in); out != tt.ascii {
t.Errorf("QuoteRuneToASCII(%U) = %s, want %s", tt.in, out, tt.ascii)
}
}
}
type canBackquoteTest struct { type canBackquoteTest struct {
in string in string
out bool out bool
...@@ -110,7 +129,12 @@ func TestCanBackquote(t *testing.T) { ...@@ -110,7 +129,12 @@ func TestCanBackquote(t *testing.T) {
} }
} }
var unquotetests = []quoteTest{ type unQuoteTest struct {
in string
out string
}
var unquotetests = []unQuoteTest{
{`""`, ""}, {`""`, ""},
{`"a"`, "a"}, {`"a"`, "a"},
{`"abc"`, "abc"}, {`"abc"`, "abc"},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment