Commit a57f4275 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 9a2e24dc
...@@ -10,18 +10,21 @@ import ( ...@@ -10,18 +10,21 @@ import (
"lab.nexedi.com/kirr/go123/mem" "lab.nexedi.com/kirr/go123/mem"
) )
const hex = "0123456789abcdef"
// pyQuote quotes string the way python repr(str) would do // pyQuote quotes string the way python repr(str) would do
func pyQuote(s string) string { func pyQuote(s string) string {
out := pyQuoteBytes(mem.Bytes(s)) out := pyQuoteBytes(mem.Bytes(s))
return mem.String(out) return mem.String(out)
} }
const hex = "0123456789abcdef"
func pyQuoteBytes(b []byte) []byte { func pyQuoteBytes(b []byte) []byte {
buf := make([]byte, 0, (len(b) + 2) /* to reduce allocations when quoting */ * 2) buf := make([]byte, 0, (len(b) + 2) /* to reduce allocations when quoting */ * 2)
return pyAppendQuoteBytes(buf, b)
}
func pyAppendQuoteBytes(buf, b []byte) []byte {
// smartquotes: choose ' or " as quoting character // smartquotes: choose ' or " as quoting character
// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L947 // https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L947
quote := byte('\'') quote := byte('\'')
...@@ -58,16 +61,19 @@ func pyQuoteBytes(b []byte) []byte { ...@@ -58,16 +61,19 @@ func pyQuoteBytes(b []byte) []byte {
// we already converted to \<letter> what python represents as such above // we already converted to \<letter> what python represents as such above
buf = append(buf, '\\', 'x', hex[b[0]>>4], hex[b[0]&0xf]) buf = append(buf, '\\', 'x', hex[b[0]>>4], hex[b[0]&0xf])
case r < utf8.RuneSelf /* RuneSelf itself is not printable */ - 1:
// we already escaped all < RuneSelf runes
buf = append(buf, byte(r))
case strconv.IsPrint(r): case strconv.IsPrint(r):
// shortcut to avoid calling QuoteRune // printable utf-8 characters go as is
buf = append(buf, b[:size]...) buf = append(buf, b[:size]...)
default: default:
// we already handled ', " and (< ' ') above, so now it // everything else goes in numeric byte escapes
// should be safe to reuse strconv.QuoteRune for i := 0; i < size; i++ {
rq := strconv.QuoteRune(r) // "'\x01'" buf = append(buf, '\\', 'x', hex[b[i]>>4], hex[b[i]&0xf])
rq = rq[1:len(rq)-1] // "\x01" }
buf = append(buf, rq...)
} }
} }
......
...@@ -4,6 +4,8 @@ package main ...@@ -4,6 +4,8 @@ package main
import ( import (
"testing" "testing"
"lab.nexedi.com/kirr/go123/mem"
) )
// byterange returns []byte with element [start,stop) // byterange returns []byte with element [start,stop)
...@@ -39,21 +41,27 @@ var pyQuoteTestv = []struct {in, quoted string} { ...@@ -39,21 +41,27 @@ var pyQuoteTestv = []struct {in, quoted string} {
// invalid utf-8 // invalid utf-8
{"\xd0a", `'\xd0a'`}, {"\xd0a", `'\xd0a'`},
// non-printable utf-8
{"\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087", `'\x7f\xc2\x80\xc2\x81\xc2\x82\xc2\x83\xc2\x84\xc2\x85\xc2\x86\xc2\x87'`},
} }
func TestPyQuote(t *testing.T) { func TestPyQuote(t *testing.T) {
for _, tt := range pyQuoteTestv { for _, tt := range pyQuoteTestv {
quoted := pyQuote(tt.in) quoted := pyQuote(tt.in)
if quoted != tt.quoted { if quoted != tt.quoted {
t.Errorf("pyQuote(%q) -> %s ; want %s", tt.in, quoted, tt.quoted) t.Errorf("pyQuote(%q) ->\nhave: %s\nwant: %s", tt.in, quoted, tt.quoted)
} }
} }
} }
func BenchmarkPyQuote(b *testing.B) { func BenchmarkPyQuote(b *testing.B) {
buf := []byte{}
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
for _, tt := range pyQuoteTestv { for _, tt := range pyQuoteTestv {
pyQuote(tt.in) buf = buf[:0]
buf = pyAppendQuoteBytes(buf, mem.Bytes(tt.in))
} }
} }
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment