Commit 438818d9 authored by Ilya Tocar's avatar Ilya Tocar Committed by Brad Fitzpatrick

bytes: use Index in Count

Similar to https://go-review.googlesource.com/28586,
but for package bytes instead of strings.
This provides simpler code and some performance gain.
Also update strings.Count to use the same code.

On AMD64 with heavily optimized Index I see:

name             old time/op    new time/op     delta
Count/10-6         47.3ns ± 0%     36.8ns ± 0%    -22.35%  (p=0.000 n=10+10)
Count/32-6          286ns ± 0%       38ns ± 0%    -86.71%  (p=0.000 n=10+10)
Count/4K-6         50.1µs ± 0%      4.4µs ± 0%    -91.18%  (p=0.000 n=10+10)
Count/4M-6         48.1ms ± 1%      4.5ms ± 0%    -90.56%  (p=0.000 n=10+9)
Count/64M-6         784ms ± 0%       73ms ± 0%    -90.73%  (p=0.000 n=10+10)
CountEasy/10-6     28.4ns ± 0%     31.0ns ± 0%     +9.23%  (p=0.000 n=10+10)
CountEasy/32-6     30.6ns ± 0%     37.0ns ± 0%    +20.92%  (p=0.000 n=10+10)
CountEasy/4K-6      186ns ± 0%      198ns ± 0%     +6.45%  (p=0.000 n=9+10)
CountEasy/4M-6      233µs ± 2%      234µs ± 2%       ~     (p=0.912 n=10+10)
CountEasy/64M-6    6.70ms ± 0%     6.68ms ± 1%       ~     (p=0.762 n=8+10)

name             old speed      new speed       delta
Count/10-6        211MB/s ± 0%    272MB/s ± 0%    +28.77%  (p=0.000 n=10+9)
Count/32-6        112MB/s ± 0%    842MB/s ± 0%   +652.84%  (p=0.000 n=10+10)
Count/4K-6       81.8MB/s ± 0%  927.6MB/s ± 0%  +1033.63%  (p=0.000 n=10+9)
Count/4M-6       87.2MB/s ± 1%  924.0MB/s ± 0%   +959.25%  (p=0.000 n=10+9)
Count/64M-6      85.6MB/s ± 0%  922.9MB/s ± 0%   +978.31%  (p=0.000 n=10+10)
CountEasy/10-6    352MB/s ± 0%    322MB/s ± 0%     -8.41%  (p=0.000 n=10+10)
CountEasy/32-6   1.05GB/s ± 0%   0.87GB/s ± 0%    -17.35%  (p=0.000 n=9+10)
CountEasy/4K-6   22.0GB/s ± 0%   20.6GB/s ± 0%     -6.33%  (p=0.000 n=10+10)
CountEasy/4M-6   18.0GB/s ± 2%   18.0GB/s ± 2%       ~     (p=0.912 n=10+10)
CountEasy/64M-6  10.0GB/s ± 0%   10.0GB/s ± 1%       ~     (p=0.762 n=8+10)

On 386, without asm version of Index:

Count/10-6         57.0ns ± 0%     56.9ns ± 0%   -0.11%  (p=0.006 n=10+9)
Count/32-6          340ns ± 0%      274ns ± 0%  -19.48%  (p=0.000 n=10+9)
Count/4K-6         49.5µs ± 0%     37.1µs ± 0%  -24.96%  (p=0.000 n=10+10)
Count/4M-6         51.1ms ± 0%     38.2ms ± 0%  -25.21%  (p=0.000 n=10+10)
Count/64M-6         818ms ± 0%      613ms ± 0%  -25.07%  (p=0.000 n=8+10)
CountEasy/10-6     60.0ns ± 0%     70.4ns ± 0%  +17.34%  (p=0.000 n=10+10)
CountEasy/32-6     81.1ns ± 0%     94.0ns ± 0%  +15.97%  (p=0.000 n=9+10)
CountEasy/4K-6     4.37µs ± 0%     4.39µs ± 0%   +0.30%  (p=0.000 n=10+9)
CountEasy/4M-6     4.43ms ± 0%     4.43ms ± 0%     ~     (p=0.579 n=10+10)
CountEasy/64M-6    70.9ms ± 0%     70.9ms ± 0%     ~     (p=0.912 n=10+10)

name             old speed      new speed       delta
Count/10-6        176MB/s ± 0%    176MB/s ± 0%   +0.10%  (p=0.000 n=10+9)
Count/32-6       93.9MB/s ± 0%  116.5MB/s ± 0%  +24.06%  (p=0.000 n=10+9)
Count/4K-6       82.7MB/s ± 0%  110.3MB/s ± 0%  +33.26%  (p=0.000 n=10+10)
Count/4M-6       82.1MB/s ± 0%  109.7MB/s ± 0%  +33.70%  (p=0.000 n=10+10)
Count/64M-6      82.0MB/s ± 0%  109.5MB/s ± 0%  +33.46%  (p=0.000 n=8+10)
CountEasy/10-6    167MB/s ± 0%    142MB/s ± 0%  -14.75%  (p=0.000 n=9+10)
CountEasy/32-6    395MB/s ± 0%    340MB/s ± 0%  -13.77%  (p=0.000 n=10+10)
CountEasy/4K-6    936MB/s ± 0%    934MB/s ± 0%   -0.29%  (p=0.000 n=10+9)
CountEasy/4M-6    947MB/s ± 0%    946MB/s ± 0%     ~     (p=0.591 n=10+10)
CountEasy/64M-6   947MB/s ± 0%    947MB/s ± 0%     ~     (p=0.867 n=10+10)

Change-Id: Ia76b247372b6f5b5d23a9f10253a86536a5153b3
Reviewed-on: https://go-review.googlesource.com/36489Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 04e0a762
...@@ -49,33 +49,19 @@ func explode(s []byte, n int) [][]byte { ...@@ -49,33 +49,19 @@ func explode(s []byte, n int) [][]byte {
// Count counts the number of non-overlapping instances of sep in s. // Count counts the number of non-overlapping instances of sep in s.
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. // If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
func Count(s, sep []byte) int { func Count(s, sep []byte) int {
n := len(sep) n := 0
if n == 0 { // special case
if len(sep) == 0 {
return utf8.RuneCount(s) + 1 return utf8.RuneCount(s) + 1
} }
if n > len(s) { for {
return 0 i := Index(s, sep)
} if i == -1 {
count := 0 return n
c := sep[0]
i := 0
t := s[:len(s)-n+1]
for i < len(t) {
if t[i] != c {
o := IndexByte(t[i:], c)
if o < 0 {
break
}
i += o
}
if n == 1 || Equal(s[i:i+n], sep) {
count++
i += n
continue
} }
i++ n++
s = s[i+len(sep):]
} }
return count
} }
// Contains reports whether subslice is within b. // Contains reports whether subslice is within b.
......
...@@ -76,18 +76,17 @@ func hashStrRev(sep string) (uint32, uint32) { ...@@ -76,18 +76,17 @@ func hashStrRev(sep string) (uint32, uint32) {
// If sep is an empty string, Count returns 1 + the number of Unicode code points in s. // If sep is an empty string, Count returns 1 + the number of Unicode code points in s.
func Count(s, sep string) int { func Count(s, sep string) int {
n := 0 n := 0
// special cases // special case
if len(sep) == 0 { if len(sep) == 0 {
return utf8.RuneCountInString(s) + 1 return utf8.RuneCountInString(s) + 1
} }
offset := 0
for { for {
i := Index(s[offset:], sep) i := Index(s, sep)
if i == -1 { if i == -1 {
return n return n
} }
n++ n++
offset += i + len(sep) s = s[i+len(sep):]
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment