Commit 9b299c1e authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

unicode/utf8: removed uses of ranging over string

Ranging over string is much slower than using DecodeRuneInString.
See golang.org/issue/13162.

Replacing ranging over a string with the implementation of the Bytes
counterpart results in the following performance improvements:

RuneCountInStringTenASCIIChars-8     43.0ns ± 1%  16.4ns ± 2%  -61.80%  (p=0.000 n=7+8)
RuneCountInStringTenJapaneseChars-8   161ns ± 2%   154ns ± 2%   -4.58%  (p=0.000 n=8+8)
ValidStringTenASCIIChars-8           52.2ns ± 1%  13.2ns ± 1%  -74.62%  (p=0.001 n=7+7)
ValidStringTenJapaneseChars-8         173ns ± 2%   153ns ± 2%  -11.78%  (p=0.000 n=7+8)

Update golang/go#13162

Change-Id: Ifc40a6a94bb3317f1f2d929d310bd2694645e9f6
Reviewed-on: https://go-review.googlesource.com/16695Reviewed-by: default avatarRuss Cox <rsc@golang.org>
parent 25a28da0
...@@ -382,10 +382,16 @@ func RuneCount(p []byte) int { ...@@ -382,10 +382,16 @@ func RuneCount(p []byte) int {
// RuneCountInString is like RuneCount but its input is a string. // RuneCountInString is like RuneCount but its input is a string.
func RuneCountInString(s string) (n int) { func RuneCountInString(s string) (n int) {
for range s { for i := 0; i < len(s); {
n++ n++
if s[i] < RuneSelf {
i++
} else {
_, size := DecodeRuneInString(s[i:])
i += size
}
} }
return return n
} }
// RuneStart reports whether the byte could be the first byte of // RuneStart reports whether the byte could be the first byte of
...@@ -415,16 +421,18 @@ func Valid(p []byte) bool { ...@@ -415,16 +421,18 @@ func Valid(p []byte) bool {
// ValidString reports whether s consists entirely of valid UTF-8-encoded runes. // ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
func ValidString(s string) bool { func ValidString(s string) bool {
for i, r := range s { for i := 0; i < len(s); {
if r == RuneError { if s[i] < RuneSelf {
// The RuneError value can be an error i++
// sentinel value (if it's size 1) or the same } else {
// value encoded properly. Decode it to see if
// it's the 1 byte sentinel value.
_, size := DecodeRuneInString(s[i:]) _, size := DecodeRuneInString(s[i:])
if size == 1 { if size == 1 {
// All valid runes of size 1 (those
// below RuneSelf) were handled above.
// This must be a RuneError.
return false return false
} }
i += size
} }
} }
return true return true
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment