Commit 74288f09 authored by Rob Pike's avatar Rob Pike

strconv: add QuoteToGraphic and friends

This version of quoting allows runes in category Zs, such as the
ideographic space characters, to be passed through unquoted.

Still to do (maybe): A way to access this from Printf.

Updates #11511.

Change-Id: I3bae84b1aa0bc1b885318d3f67c5f451099a2a5a
Reviewed-on: https://go-review.googlesource.com/14184Reviewed-by: default avatarMarcel van Lohuizen <mpvl@golang.org>
parent 9ac0fff7
...@@ -635,3 +635,23 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry ...@@ -635,3 +635,23 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0xf57a, 0xf57a,
0xf5a4, 0xf5a4,
} }
// isGraphic lists the graphic runes not matched by IsPrint.
var isGraphic = []uint16{
0x00a0,
0x1680,
0x2000,
0x2001,
0x2002,
0x2003,
0x2004,
0x2005,
0x2006,
0x2007,
0x2008,
0x2009,
0x200a,
0x202f,
0x205f,
0x3000,
}
...@@ -174,6 +174,23 @@ func main() { ...@@ -174,6 +174,23 @@ func main() {
} }
fmt.Fprintf(&buf, "\t%#04x,\n", r-0x10000) fmt.Fprintf(&buf, "\t%#04x,\n", r-0x10000)
} }
fmt.Fprintf(&buf, "}\n\n")
// The list of graphic but not "printable" runes is short. Just make one easy table.
fmt.Fprintf(&buf, "// isGraphic lists the graphic runes not matched by IsPrint.\n")
fmt.Fprintf(&buf, "var isGraphic = []uint16{\n")
for r := rune(0); r <= unicode.MaxRune; r++ {
if unicode.IsPrint(r) != unicode.IsGraphic(r) {
// Sanity check.
if !unicode.IsGraphic(r) {
log.Fatalf("%U is printable but not graphic\n", r)
}
if r > 0xFFFF { // We expect only 16-bit values.
log.Fatalf("%U too big for isGraphic\n", r)
}
fmt.Fprintf(&buf, "\t%#04x,\n", r)
}
}
fmt.Fprintf(&buf, "}\n") fmt.Fprintf(&buf, "}\n")
data, err := format.Source(buf.Bytes()) data, err := format.Source(buf.Bytes())
......
...@@ -12,7 +12,7 @@ import ( ...@@ -12,7 +12,7 @@ import (
const lowerhex = "0123456789abcdef" const lowerhex = "0123456789abcdef"
func quoteWith(s string, quote byte, ASCIIonly bool) string { func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
var runeTmp [utf8.UTFMax]byte var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
buf = append(buf, quote) buf = append(buf, quote)
...@@ -38,7 +38,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string { ...@@ -38,7 +38,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string {
buf = append(buf, byte(r)) buf = append(buf, byte(r))
continue continue
} }
} else if IsPrint(r) { } else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
n := utf8.EncodeRune(runeTmp[:], r) n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...) buf = append(buf, runeTmp[:n]...)
continue continue
...@@ -90,7 +90,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string { ...@@ -90,7 +90,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string {
// control characters and non-printable characters as defined by // control characters and non-printable characters as defined by
// IsPrint. // IsPrint.
func Quote(s string) string { func Quote(s string) string {
return quoteWith(s, '"', false) return quoteWith(s, '"', false, false)
} }
// AppendQuote appends a double-quoted Go string literal representing s, // AppendQuote appends a double-quoted Go string literal representing s,
...@@ -103,7 +103,7 @@ func AppendQuote(dst []byte, s string) []byte { ...@@ -103,7 +103,7 @@ func AppendQuote(dst []byte, s string) []byte {
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// non-ASCII characters and non-printable characters as defined by IsPrint. // non-ASCII characters and non-printable characters as defined by IsPrint.
func QuoteToASCII(s string) string { func QuoteToASCII(s string) string {
return quoteWith(s, '"', true) return quoteWith(s, '"', true, false)
} }
// AppendQuoteToASCII appends a double-quoted Go string literal representing s, // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
...@@ -112,12 +112,25 @@ func AppendQuoteToASCII(dst []byte, s string) []byte { ...@@ -112,12 +112,25 @@ func AppendQuoteToASCII(dst []byte, s string) []byte {
return append(dst, QuoteToASCII(s)...) return append(dst, QuoteToASCII(s)...)
} }
// QuoteToGraphic returns a double-quoted Go string literal representing s.
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// non-ASCII characters and non-printable characters as defined by IsGraphic.
func QuoteToGraphic(s string) string {
return quoteWith(s, '"', false, true)
}
// AppendQuoteToGraphic appends a double-quoted Go string literal representing s,
// as generated by QuoteToGraphic, to dst and returns the extended buffer.
func AppendQuoteToGraphic(dst []byte, s string) []byte {
return append(dst, QuoteToGraphic(s)...)
}
// QuoteRune returns a single-quoted Go character literal representing the // QuoteRune returns a single-quoted Go character literal representing the
// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
// for control characters and non-printable characters as defined by IsPrint. // for control characters and non-printable characters as defined by IsPrint.
func QuoteRune(r rune) string { func QuoteRune(r rune) string {
// TODO: avoid the allocation here. // TODO: avoid the allocation here.
return quoteWith(string(r), '\'', false) return quoteWith(string(r), '\'', false, false)
} }
// AppendQuoteRune appends a single-quoted Go character literal representing the rune, // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
...@@ -127,12 +140,12 @@ func AppendQuoteRune(dst []byte, r rune) []byte { ...@@ -127,12 +140,12 @@ func AppendQuoteRune(dst []byte, r rune) []byte {
} }
// QuoteRuneToASCII returns a single-quoted Go character literal representing // QuoteRuneToASCII returns a single-quoted Go character literal representing
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF, // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
// \u0100) for non-ASCII characters and non-printable characters as defined // \u0100) for non-ASCII characters and non-printable characters as defined
// by IsPrint. // by IsPrint.
func QuoteRuneToASCII(r rune) string { func QuoteRuneToASCII(r rune) string {
// TODO: avoid the allocation here. // TODO: avoid the allocation here.
return quoteWith(string(r), '\'', true) return quoteWith(string(r), '\'', true, false)
} }
// AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
...@@ -141,6 +154,21 @@ func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { ...@@ -141,6 +154,21 @@ func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
return append(dst, QuoteRuneToASCII(r)...) return append(dst, QuoteRuneToASCII(r)...)
} }
// QuoteRuneToGraphic returns a single-quoted Go character literal representing
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
// \u0100) for non-ASCII characters and non-printable characters as defined
// by IsGraphic.
func QuoteRuneToGraphic(r rune) string {
// TODO: avoid the allocation here.
return quoteWith(string(r), '\'', false, true)
}
// AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,
// as generated by QuoteRuneToGraphic, to dst and returns the extended buffer.
func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
return append(dst, QuoteRuneToGraphic(r)...)
}
// CanBackquote reports whether the string s can be represented // CanBackquote reports whether the string s can be represented
// unchanged as a single-line backquoted string without control // unchanged as a single-line backquoted string without control
// characters other than tab. // characters other than tab.
...@@ -453,3 +481,26 @@ func IsPrint(r rune) bool { ...@@ -453,3 +481,26 @@ func IsPrint(r rune) bool {
j := bsearch16(isNotPrint, uint16(r)) j := bsearch16(isNotPrint, uint16(r))
return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
} }
// IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such
// characters include letters, marks, numbers, punctuation, symbols, and
// spaces, from categories L, M, N, P, S, and Zs.
func IsGraphic(r rune) bool {
if IsPrint(r) {
return true
}
return isInGraphicList(r)
}
// isInGraphicList reports whether the rune is in the isGraphic list. This separation
// from IsGraphic allows quoteWith to avoid two calls to IsPrint.
// Should be called only if IsPrint fails.
func isInGraphicList(r rune) bool {
// We know r must fit in 16 bits - see makeisprint.go.
if r > 0xFFFF {
return false
}
rr := uint16(r)
i := bsearch16(isGraphic, rr)
return i < len(isGraphic) && rr == isGraphic[i]
}
...@@ -10,7 +10,7 @@ import ( ...@@ -10,7 +10,7 @@ import (
"unicode" "unicode"
) )
// Verify that our isPrint agrees with unicode.IsPrint // Verify that our IsPrint agrees with unicode.IsPrint.
func TestIsPrint(t *testing.T) { func TestIsPrint(t *testing.T) {
n := 0 n := 0
for r := rune(0); r <= unicode.MaxRune; r++ { for r := rune(0); r <= unicode.MaxRune; r++ {
...@@ -24,19 +24,36 @@ func TestIsPrint(t *testing.T) { ...@@ -24,19 +24,36 @@ func TestIsPrint(t *testing.T) {
} }
} }
// Verify that our IsGraphic agrees with unicode.IsGraphic.
func TestIsGraphic(t *testing.T) {
n := 0
for r := rune(0); r <= unicode.MaxRune; r++ {
if IsGraphic(r) != unicode.IsGraphic(r) {
t.Errorf("IsGraphic(%U)=%t incorrect", r, IsGraphic(r))
n++
if n > 10 {
return
}
}
}
}
type quoteTest struct { type quoteTest struct {
in string in string
out string out string
ascii string ascii string
graphic string
} }
var quotetests = []quoteTest{ var quotetests = []quoteTest{
{"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`}, {"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`},
{"\\", `"\\"`, `"\\"`}, {"\\", `"\\"`, `"\\"`, `"\\"`},
{"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`}, {"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`, `"abc\xffdef"`},
{"\u263a", `"☺"`, `"\u263a"`}, {"\u263a", `"☺"`, `"\u263a"`, `"☺"`},
{"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`}, {"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`, `"\U0010ffff"`},
{"\x04", `"\x04"`, `"\x04"`}, {"\x04", `"\x04"`, `"\x04"`, `"\x04"`},
// Some non-printable but graphic runes. Final column is double-quoted.
{"!\u00a0!\u2000!\u3000!", `"!\u00a0!\u2000!\u3000!"`, `"!\u00a0!\u2000!\u3000!"`, "\"!\u00a0!\u2000!\u3000!\""},
} }
func TestQuote(t *testing.T) { func TestQuote(t *testing.T) {
...@@ -61,22 +78,38 @@ func TestQuoteToASCII(t *testing.T) { ...@@ -61,22 +78,38 @@ func TestQuoteToASCII(t *testing.T) {
} }
} }
func TestQuoteToGraphic(t *testing.T) {
for _, tt := range quotetests {
if out := QuoteToGraphic(tt.in); out != tt.graphic {
t.Errorf("QuoteToGraphic(%s) = %s, want %s", tt.in, out, tt.graphic)
}
if out := AppendQuoteToGraphic([]byte("abc"), tt.in); string(out) != "abc"+tt.graphic {
t.Errorf("AppendQuoteToGraphic(%q, %s) = %s, want %s", "abc", tt.in, out, "abc"+tt.graphic)
}
}
}
type quoteRuneTest struct { type quoteRuneTest struct {
in rune in rune
out string out string
ascii string ascii string
graphic string
} }
var quoterunetests = []quoteRuneTest{ var quoterunetests = []quoteRuneTest{
{'a', `'a'`, `'a'`}, {'a', `'a'`, `'a'`, `'a'`},
{'\a', `'\a'`, `'\a'`}, {'\a', `'\a'`, `'\a'`, `'\a'`},
{'\\', `'\\'`, `'\\'`}, {'\\', `'\\'`, `'\\'`, `'\\'`},
{0xFF, `'ÿ'`, `'\u00ff'`}, {0xFF, `'ÿ'`, `'\u00ff'`, `'ÿ'`},
{0x263a, `'☺'`, `'\u263a'`}, {0x263a, `'☺'`, `'\u263a'`, `'☺'`},
{0xfffd, `'�'`, `'\ufffd'`}, {0xfffd, `'�'`, `'\ufffd'`, `'�'`},
{0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`}, {0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`, `'\U0010ffff'`},
{0x0010ffff + 1, `'�'`, `'\ufffd'`}, {0x0010ffff + 1, `'�'`, `'\ufffd'`, `'�'`},
{0x04, `'\x04'`, `'\x04'`}, {0x04, `'\x04'`, `'\x04'`, `'\x04'`},
// Some differences between graphic and printable. Note the last column is double-quoted.
{'\u00a0', `'\u00a0'`, `'\u00a0'`, "'\u00a0'"},
{'\u2000', `'\u2000'`, `'\u2000'`, "'\u2000'"},
{'\u3000', `'\u3000'`, `'\u3000'`, "'\u3000'"},
} }
func TestQuoteRune(t *testing.T) { func TestQuoteRune(t *testing.T) {
...@@ -101,6 +134,17 @@ func TestQuoteRuneToASCII(t *testing.T) { ...@@ -101,6 +134,17 @@ func TestQuoteRuneToASCII(t *testing.T) {
} }
} }
func TestQuoteRuneToGraphic(t *testing.T) {
for _, tt := range quoterunetests {
if out := QuoteRuneToGraphic(tt.in); out != tt.graphic {
t.Errorf("QuoteRuneToGraphic(%U) = %s, want %s", tt.in, out, tt.graphic)
}
if out := AppendQuoteRuneToGraphic([]byte("abc"), tt.in); string(out) != "abc"+tt.graphic {
t.Errorf("AppendQuoteRuneToGraphic(%q, %U) = %s, want %s", "abc", tt.in, out, "abc"+tt.graphic)
}
}
}
type canBackquoteTest struct { type canBackquoteTest struct {
in string in string
out bool out bool
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment