Commit 4b4f222a authored by Ben Hoyt's avatar Ben Hoyt Committed by Brad Fitzpatrick

bytes, strings: speed up TrimSpace 4-5x for common ASCII cases

This change adds a fast path for ASCII strings to both
strings.TrimSpace and bytes.TrimSpace. It doesn't slow down the
non-ASCII path much, if at all.

I added benchmarks for strings.TrimSpace as it didn't have any, and
I fleshed out the benchmarks for bytes.TrimSpace as it just had one
case (for ASCII). The benchmarks (and the code!) are now the same
between the two versions. Below are the benchmark results:

strings.TrimSpace:

name                      old time/op  new time/op  delta
TrimSpace/NoTrim-8        18.6ns ± 0%   3.8ns ± 0%  -79.53%  (p=0.000 n=5+4)
TrimSpace/ASCII-8         33.5ns ± 2%   6.0ns ± 3%  -82.05%  (p=0.008 n=5+5)
TrimSpace/SomeNonASCII-8  97.1ns ± 1%  88.6ns ± 1%   -8.68%  (p=0.008 n=5+5)
TrimSpace/JustNonASCII-8   144ns ± 0%   143ns ± 0%     ~     (p=0.079 n=4+5)

bytes.TrimSpace:

name                      old time/op  new time/op  delta
TrimSpace/NoTrim-8        18.9ns ± 1%   4.1ns ± 1%  -78.34%  (p=0.008 n=5+5)
TrimSpace/ASCII-8         29.9ns ± 0%   6.3ns ± 1%  -79.06%  (p=0.008 n=5+5)
TrimSpace/SomeNonASCII-8  91.5ns ± 0%  82.3ns ± 0%  -10.03%  (p=0.008 n=5+5)
TrimSpace/JustNonASCII-8   150ns ± 0%   150ns ± 0%     ~     (all equal)

Fixes #29122

Change-Id: Ica45cd86a219cadf60173ec9db260133cd1d7951
Reviewed-on: https://go-review.googlesource.com/c/go/+/152917Reviewed-by: default avatarDaniel Martí <mvdan@mvdan.cc>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 10aede26
......@@ -759,7 +759,36 @@ func TrimRight(s []byte, cutset string) []byte {
// TrimSpace returns a subslice of s by slicing off all leading and
// trailing white space, as defined by Unicode.
func TrimSpace(s []byte) []byte {
return TrimFunc(s, unicode.IsSpace)
// Fast path for ASCII: look for the first ASCII non-space byte
start := 0
for ; start < len(s); start++ {
c := s[start]
if c >= utf8.RuneSelf {
// If we run into a non-ASCII byte, fall back to the
// slower unicode-aware method on the remaining bytes
return TrimFunc(s[start:], unicode.IsSpace)
}
if asciiSpace[c] == 0 {
break
}
}
// Now look for the first ASCII non-space byte from the end
stop := len(s)
for ; stop > start; stop-- {
c := s[stop-1]
if c >= utf8.RuneSelf {
return TrimFunc(s[start:stop], unicode.IsSpace)
}
if asciiSpace[c] == 0 {
break
}
}
// At this point s[start:stop] starts and ends with an ASCII
// non-space bytes, so we're done. Non-ASCII cases have already
// been handled above.
return s[start:stop]
}
// Runes interprets s as a sequence of UTF-8-encoded code points.
......
......@@ -1617,9 +1617,21 @@ func BenchmarkFieldsFunc(b *testing.B) {
}
func BenchmarkTrimSpace(b *testing.B) {
s := []byte(" Some text. \n")
for i := 0; i < b.N; i++ {
TrimSpace(s)
tests := []struct {
name string
input []byte
}{
{"NoTrim", []byte("typical")},
{"ASCII", []byte(" foo bar ")},
{"SomeNonASCII", []byte(" \u2000\t\r\n x\t\t\r\r\ny\n \u3000 ")},
{"JustNonASCII", []byte("\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000")},
}
for _, test := range tests {
b.Run(test.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
TrimSpace(test.input)
}
})
}
}
......
......@@ -818,7 +818,36 @@ func TrimRight(s string, cutset string) string {
// TrimSpace returns a slice of the string s, with all leading
// and trailing white space removed, as defined by Unicode.
func TrimSpace(s string) string {
return TrimFunc(s, unicode.IsSpace)
// Fast path for ASCII: look for the first ASCII non-space byte
start := 0
for ; start < len(s); start++ {
c := s[start]
if c >= utf8.RuneSelf {
// If we run into a non-ASCII byte, fall back to the
// slower unicode-aware method on the remaining bytes
return TrimFunc(s[start:], unicode.IsSpace)
}
if asciiSpace[c] == 0 {
break
}
}
// Now look for the first ASCII non-space byte from the end
stop := len(s)
for ; stop > start; stop-- {
c := s[stop-1]
if c >= utf8.RuneSelf {
return TrimFunc(s[start:stop], unicode.IsSpace)
}
if asciiSpace[c] == 0 {
break
}
}
// At this point s[start:stop] starts and ends with an ASCII
// non-space bytes, so we're done. Non-ASCII cases have already
// been handled above.
return s[start:stop]
}
// TrimPrefix returns s without the provided leading prefix string.
......
......@@ -1731,3 +1731,19 @@ func BenchmarkJoin(b *testing.B) {
})
}
}
func BenchmarkTrimSpace(b *testing.B) {
tests := []struct{ name, input string }{
{"NoTrim", "typical"},
{"ASCII", " foo bar "},
{"SomeNonASCII", " \u2000\t\r\n x\t\t\r\r\ny\n \u3000 "},
{"JustNonASCII", "\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000"},
}
for _, test := range tests {
b.Run(test.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
TrimSpace(test.input)
}
})
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment