Commit fee7f2ab authored by Martin Möhrmann's avatar Martin Möhrmann

strings: speed up FieldsFunc

Increases performance of FieldsFunc by recording the start and end
of the fields in an array. The first 32 fields are saved in a pre-allocated
array on the stack. This avoids the old behavior of iterating over the
input string two times but uses more allocations when more than 32 fields
are encountered.

Additionally code for handling non-ASCII containing strings from Fields is
removed and replaced by a call to the new faster FieldsFunc function.

Overall this still leads to a slowdown for Fields on non-ASCII strings
while speeding up Fields in general.

name                      old time/op    new time/op     delta
Fields/ASCII/16              116ns ± 5%      115ns ± 5%       ~     (p=0.480 n=10+10)
Fields/ASCII/256             765ns ± 1%      761ns ± 2%       ~     (p=0.171 n=10+10)
Fields/ASCII/4096           12.5µs ± 1%     12.7µs ± 1%     +1.82%  (p=0.000 n=10+10)
Fields/ASCII/65536           226µs ± 1%      226µs ± 2%       ~     (p=0.739 n=10+10)
Fields/ASCII/1048576        5.12ms ± 1%     5.12ms ± 1%       ~     (p=0.696 n=8+10)
Fields/Mixed/16              172ns ± 1%      233ns ± 1%    +35.90%  (p=0.000 n=9+10)
Fields/Mixed/256            1.18µs ± 2%     2.45µs ± 1%   +107.47%  (p=0.000 n=10+10)
Fields/Mixed/4096           20.3µs ± 1%     43.1µs ± 2%   +112.41%  (p=0.000 n=10+10)
Fields/Mixed/65536           364µs ± 1%      704µs ± 1%    +93.56%  (p=0.000 n=9+10)
Fields/Mixed/1048576        7.07ms ± 2%    13.34ms ± 4%    +88.83%  (p=0.000 n=10+10)
FieldsFunc/ASCII/16          274ns ± 1%      188ns ± 3%    -31.44%  (p=0.000 n=10+10)
FieldsFunc/ASCII/256        3.69µs ± 1%     2.06µs ± 2%    -44.26%  (p=0.000 n=10+10)
FieldsFunc/ASCII/4096       59.9µs ± 1%     35.3µs ± 2%    -41.10%  (p=0.000 n=10+10)
FieldsFunc/ASCII/65536       958µs ± 1%      567µs ± 1%    -40.82%  (p=0.000 n=10+9)
FieldsFunc/ASCII/1048576    16.3ms ± 2%     11.0ms ± 3%    -32.52%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16          309ns ± 1%      213ns ± 0%    -30.98%  (p=0.000 n=10+6)
FieldsFunc/Mixed/256        3.83µs ± 1%     2.14µs ± 1%    -44.01%  (p=0.000 n=10+10)
FieldsFunc/Mixed/4096       66.2µs ± 2%     37.8µs ± 1%    -42.85%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536      1.09ms ± 1%     0.63ms ± 1%    -42.73%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576    18.6ms ± 3%     12.0ms ± 2%    -35.50%  (p=0.000 n=10+10)

Fixes #17856
Fixes #19789

Change-Id: I9f5a560e534566fd81963651f342c8f44cfb0469
Reviewed-on: https://go-review.googlesource.com/42810Reviewed-by: default avatarJoe Tsai <thebrokentoaster@gmail.com>
parent fc6b74ce
...@@ -358,66 +358,7 @@ func Fields(s string) []string { ...@@ -358,66 +358,7 @@ func Fields(s string) []string {
} }
// Some runes in the input string are not ASCII. // Some runes in the input string are not ASCII.
// Same general approach as in the ASCII path but return FieldsFunc(s, unicode.IsSpace)
// uses DecodeRuneInString and unicode.IsSpace if
// a non-ASCII rune needs to be decoded and checked
// if it corresponds to a space.
a := make([]string, 0, n)
i := 0
// Skip spaces in the front of the input.
for i < len(s) {
if c := s[i]; c < utf8.RuneSelf {
if asciiSpace[c] == 0 {
break
}
i++
} else {
r, w := utf8.DecodeRuneInString(s[i:])
if !unicode.IsSpace(r) {
break
}
i += w
}
}
fieldStart := i
for i < len(s) {
if c := s[i]; c < utf8.RuneSelf {
if asciiSpace[c] == 0 {
i++
continue
}
a = append(a, s[fieldStart:i])
i++
} else {
r, w := utf8.DecodeRuneInString(s[i:])
if !unicode.IsSpace(r) {
i += w
continue
}
a = append(a, s[fieldStart:i])
i += w
}
// Skip spaces in between fields.
for i < len(s) {
if c := s[i]; c < utf8.RuneSelf {
if asciiSpace[c] == 0 {
break
}
i++
} else {
r, w := utf8.DecodeRuneInString(s[i:])
if !unicode.IsSpace(r) {
break
}
i += w
}
}
fieldStart = i
}
if fieldStart < len(s) { // Last field might end at EOF.
a = append(a, s[fieldStart:])
}
return a
} }
// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
...@@ -426,35 +367,42 @@ func Fields(s string) []string { ...@@ -426,35 +367,42 @@ func Fields(s string) []string {
// FieldsFunc makes no guarantees about the order in which it calls f(c). // FieldsFunc makes no guarantees about the order in which it calls f(c).
// If f does not return consistent results for a given c, FieldsFunc may crash. // If f does not return consistent results for a given c, FieldsFunc may crash.
func FieldsFunc(s string, f func(rune) bool) []string { func FieldsFunc(s string, f func(rune) bool) []string {
// First count the fields. // A span is used to record a slice of s of the form s[start:end].
n := 0 // The start index is inclusive and the end index is exclusive.
inField := false type span struct {
for _, rune := range s { start int
wasInField := inField end int
inField = !f(rune)
if inField && !wasInField {
n++
}
} }
spans := make([]span, 0, 32)
// Now create them. // Find the field start and end indices.
a := make([]string, n) wasField := false
na := 0 fromIndex := 0
fieldStart := -1 // Set to -1 when looking for start of field.
for i, rune := range s { for i, rune := range s {
if f(rune) { if f(rune) {
if fieldStart >= 0 { if wasField {
a[na] = s[fieldStart:i] spans = append(spans, span{start: fromIndex, end: i})
na++ wasField = false
fieldStart = -1 }
} else {
if !wasField {
fromIndex = i
wasField = true
} }
} else if fieldStart == -1 {
fieldStart = i
} }
} }
if fieldStart >= 0 { // Last field might end at EOF.
a[na] = s[fieldStart:] // Last field might end at EOF.
if wasField {
spans = append(spans, span{fromIndex, len(s)})
}
// Create strings from recorded field indices.
a := make([]string, len(spans))
for i, span := range spans {
a[i] = s[span.start:span.end]
} }
return a return a
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment