Commit bba7396f authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

strings: implement a faster byte->string Replacer

This implements a replacer for when all old strings are single
bytes, but new values are not.

BenchmarkHTMLEscapeNew   1000000   1090 ns/op
BenchmarkHTMLEscapeOld   1000000   2049 ns/op

R=rsc
CC=golang-dev
https://golang.org/cl/5176043
parent e419535f
...@@ -47,6 +47,8 @@ func (h Header) Write(w io.Writer) os.Error { ...@@ -47,6 +47,8 @@ func (h Header) Write(w io.Writer) os.Error {
return h.WriteSubset(w, nil) return h.WriteSubset(w, nil)
} }
var headerNewlineToSpace = strings.NewReplacer("\n", " ", "\r", " ")
// WriteSubset writes a header in wire format. // WriteSubset writes a header in wire format.
// If exclude is not nil, keys where exclude[key] == true are not written. // If exclude is not nil, keys where exclude[key] == true are not written.
func (h Header) WriteSubset(w io.Writer, exclude map[string]bool) os.Error { func (h Header) WriteSubset(w io.Writer, exclude map[string]bool) os.Error {
...@@ -59,8 +61,7 @@ func (h Header) WriteSubset(w io.Writer, exclude map[string]bool) os.Error { ...@@ -59,8 +61,7 @@ func (h Header) WriteSubset(w io.Writer, exclude map[string]bool) os.Error {
sort.Strings(keys) sort.Strings(keys)
for _, k := range keys { for _, k := range keys {
for _, v := range h[k] { for _, v := range h[k] {
v = strings.Replace(v, "\n", " ", -1) v = headerNewlineToSpace.Replace(v)
v = strings.Replace(v, "\r", " ", -1)
v = strings.TrimSpace(v) v = strings.TrimSpace(v)
if _, err := fmt.Fprintf(w, "%s: %s\r\n", k, v); err != nil { if _, err := fmt.Fprintf(w, "%s: %s\r\n", k, v); err != nil {
return err return err
......
...@@ -752,13 +752,16 @@ func Redirect(w ResponseWriter, r *Request, urlStr string, code int) { ...@@ -752,13 +752,16 @@ func Redirect(w ResponseWriter, r *Request, urlStr string, code int) {
} }
} }
var htmlReplacer = strings.NewReplacer(
"&", "&",
"<", "&lt;",
">", "&gt;",
`"`, "&quot;",
"'", "&apos;",
)
func htmlEscape(s string) string { func htmlEscape(s string) string {
s = strings.Replace(s, "&", "&amp;", -1) return htmlReplacer.Replace(s)
s = strings.Replace(s, "<", "&lt;", -1)
s = strings.Replace(s, ">", "&gt;", -1)
s = strings.Replace(s, "\"", "&quot;", -1)
s = strings.Replace(s, "'", "&apos;", -1)
return s
} }
// Redirect to a fixed URL // Redirect to a fixed URL
......
...@@ -85,10 +85,10 @@ func (w *Writer) CreatePart(header textproto.MIMEHeader) (io.Writer, os.Error) { ...@@ -85,10 +85,10 @@ func (w *Writer) CreatePart(header textproto.MIMEHeader) (io.Writer, os.Error) {
return p, nil return p, nil
} }
var quoteEscaper = strings.NewReplacer("\\", "\\\\", `"`, "\\\"")
func escapeQuotes(s string) string { func escapeQuotes(s string) string {
s = strings.Replace(s, "\\", "\\\\", -1) return quoteEscaper.Replace(s)
s = strings.Replace(s, "\"", "\\\"", -1)
return s
} }
// CreateFormFile is a convenience wrapper around CreatePart. It creates // CreateFormFile is a convenience wrapper around CreatePart. It creates
......
...@@ -36,8 +36,12 @@ func NewReplacer(oldnew ...string) *Replacer { ...@@ -36,8 +36,12 @@ func NewReplacer(oldnew ...string) *Replacer {
panic("strings.NewReplacer: odd argument count") panic("strings.NewReplacer: odd argument count")
} }
var bb byteReplacer // Possible implementations.
var gen genericReplacer var (
bb byteReplacer
bs byteStringReplacer
gen genericReplacer
)
allOldBytes, allNewBytes := true, true allOldBytes, allNewBytes := true, true
for len(oldnew) > 0 { for len(oldnew) > 0 {
...@@ -49,7 +53,17 @@ func NewReplacer(oldnew ...string) *Replacer { ...@@ -49,7 +53,17 @@ func NewReplacer(oldnew ...string) *Replacer {
if len(new) != 1 { if len(new) != 1 {
allNewBytes = false allNewBytes = false
} }
// generic
gen.p = append(gen.p, pair{old, new}) gen.p = append(gen.p, pair{old, new})
// byte -> string
if allOldBytes {
bs.old.set(old[0])
bs.new[old[0]] = []byte(new)
}
// byte -> byte
if allOldBytes && allNewBytes { if allOldBytes && allNewBytes {
bb.old.set(old[0]) bb.old.set(old[0])
bb.new[old[0]] = new[0] bb.new[old[0]] = new[0]
...@@ -59,6 +73,9 @@ func NewReplacer(oldnew ...string) *Replacer { ...@@ -59,6 +73,9 @@ func NewReplacer(oldnew ...string) *Replacer {
if allOldBytes && allNewBytes { if allOldBytes && allNewBytes {
return &Replacer{r: &bb} return &Replacer{r: &bb}
} }
if allOldBytes {
return &Replacer{r: &bs}
}
return &Replacer{r: &gen} return &Replacer{r: &gen}
} }
...@@ -176,6 +193,7 @@ func (r *byteReplacer) Replace(s string) string { ...@@ -176,6 +193,7 @@ func (r *byteReplacer) Replace(s string) string {
} }
func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) { func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) {
// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
bufsize := 32 << 10 bufsize := 32 << 10
if len(s) < bufsize { if len(s) < bufsize {
bufsize = len(s) bufsize = len(s)
...@@ -199,6 +217,94 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) ...@@ -199,6 +217,94 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error)
return n, nil return n, nil
} }
// byteStringReplacer is the implementation that's used when all the
// "old" values are single ASCII bytes but the "new" values vary in
// size.
type byteStringReplacer struct {
// old has a bit set for each old byte that should be replaced.
old byteBitmap
// replacement string, indexed by old byte. only valid if
// corresponding old bit is set.
new [256][]byte
}
func (r *byteStringReplacer) Replace(s string) string {
newSize := 0
anyChanges := false
for i := 0; i < len(s); i++ {
b := s[i]
if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
anyChanges = true
newSize += len(r.new[b])
} else {
newSize++
}
}
if !anyChanges {
return s
}
buf := make([]byte, newSize)
bi := buf
for i := 0; i < len(s); i++ {
b := s[i]
if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
n := copy(bi[:], r.new[b])
bi = bi[n:]
} else {
bi[0] = b
bi = bi[1:]
}
}
return string(buf)
}
// WriteString maintains one buffer that's at most 32KB. The bytes in
// s are enumerated and the buffer is filled. If it reaches its
// capacity or a byte has a replacement, the buffer is flushed to w.
func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) {
// TODO(bradfitz): use io.WriteString with slices of s instead.
bufsize := 32 << 10
if len(s) < bufsize {
bufsize = len(s)
}
buf := make([]byte, bufsize)
bi := buf[:0]
for i := 0; i < len(s); i++ {
b := s[i]
var new []byte
if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
new = r.new[b]
} else {
bi = append(bi, b)
}
if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) {
nw, err := w.Write(bi)
n += nw
if err != nil {
return n, err
}
bi = buf[:0]
}
if len(new) > 0 {
nw, err := w.Write(new)
n += nw
if err != nil {
return n, err
}
}
}
if len(bi) > 0 {
nw, err := w.Write(bi)
n += nw
if err != nil {
return n, err
}
}
return n, nil
}
// strings is too low-level to import io/ioutil // strings is too low-level to import io/ioutil
var discard io.Writer = devNull(0) var discard io.Writer = devNull(0)
......
...@@ -41,14 +41,21 @@ var capitalLetters = NewReplacer("a", "A", "b", "B") ...@@ -41,14 +41,21 @@ var capitalLetters = NewReplacer("a", "A", "b", "B")
var blankToXReplacer = NewReplacer("", "X", "o", "O") var blankToXReplacer = NewReplacer("", "X", "o", "O")
var ReplacerTests = []ReplacerTest{ var ReplacerTests = []ReplacerTest{
// byte->string
{htmlEscaper, "No changes", "No changes"}, {htmlEscaper, "No changes", "No changes"},
{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"}, {htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
{htmlEscaper, "&&&", "&amp;&amp;&amp;"}, {htmlEscaper, "&&&", "&amp;&amp;&amp;"},
// generic
{replacer, "fooaaabar", "foo3[aaa]b1[a]r"}, {replacer, "fooaaabar", "foo3[aaa]b1[a]r"},
{replacer, "long, longerst, longer", "short, most long, medium"}, {replacer, "long, longerst, longer", "short, most long, medium"},
{replacer, "XiX", "YiY"}, {replacer, "XiX", "YiY"},
// byte->byte
{capitalLetters, "brad", "BrAd"}, {capitalLetters, "brad", "BrAd"},
{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)}, {capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
// hitting "" special case
{blankToXReplacer, "oo", "XOXOX"}, {blankToXReplacer, "oo", "XOXOX"},
} }
...@@ -84,7 +91,9 @@ type pickAlgorithmTest struct { ...@@ -84,7 +91,9 @@ type pickAlgorithmTest struct {
var pickAlgorithmTests = []pickAlgorithmTest{ var pickAlgorithmTests = []pickAlgorithmTest{
{capitalLetters, "*strings.byteReplacer"}, {capitalLetters, "*strings.byteReplacer"},
{NewReplacer("a", "A", "b", "Bb"), "*strings.genericReplacer"}, {NewReplacer("12", "123"), "*strings.genericReplacer"},
{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
{htmlEscaper, "*strings.byteStringReplacer"},
} }
func TestPickAlgorithm(t *testing.T) { func TestPickAlgorithm(t *testing.T) {
...@@ -118,6 +127,27 @@ func BenchmarkByteByteMatch(b *testing.B) { ...@@ -118,6 +127,27 @@ func BenchmarkByteByteMatch(b *testing.B) {
} }
} }
func BenchmarkByteStringMatch(b *testing.B) {
str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
for i := 0; i < b.N; i++ {
htmlEscaper.Replace(str)
}
}
func BenchmarkHTMLEscapeNew(b *testing.B) {
str := "I <3 to escape HTML & other text too."
for i := 0; i < b.N; i++ {
htmlEscaper.Replace(str)
}
}
func BenchmarkHTMLEscapeOld(b *testing.B) {
str := "I <3 to escape HTML & other text too."
for i := 0; i < b.N; i++ {
oldhtmlEscape(str)
}
}
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
func BenchmarkByteByteReplaces(b *testing.B) { func BenchmarkByteByteReplaces(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100) str := Repeat("a", 100) + Repeat("b", 100)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment