Commit a9581e2e authored by Alberto Donizetti's avatar Alberto Donizetti Committed by Brad Fitzpatrick

unicode/utf16: speed up and clean up Encode and EncodeRune

name                        old time/op  new time/op  delta
EncodeValidASCII-4          74.1ns ± 1%  70.1ns ± 1%   -5.46%  (p=0.000 n=10+10)
EncodeValidJapaneseChars-4  61.3ns ± 0%  58.9ns ± 0%   -3.82%  (p=0.000 n=10+10)
EncodeRune-4                13.1ns ± 1%   9.8ns ± 0%  -25.24%   (p=0.000 n=10+9)

Fixes #6957

Change-Id: I9dde6d77420c34c6e2ef3e6213bb6be9b58a3074
Reviewed-on: https://go-review.googlesource.com/19891Reviewed-by: default avatarRob Pike <r@golang.org>
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 47b04228
...@@ -45,7 +45,7 @@ func DecodeRune(r1, r2 rune) rune { ...@@ -45,7 +45,7 @@ func DecodeRune(r1, r2 rune) rune {
// If the rune is not a valid Unicode code point or does not need encoding, // If the rune is not a valid Unicode code point or does not need encoding,
// EncodeRune returns U+FFFD, U+FFFD. // EncodeRune returns U+FFFD, U+FFFD.
func EncodeRune(r rune) (r1, r2 rune) { func EncodeRune(r rune) (r1, r2 rune) {
if r < surrSelf || r > maxRune || IsSurrogate(r) { if r < surrSelf || r > maxRune {
return replacementChar, replacementChar return replacementChar, replacementChar
} }
r -= surrSelf r -= surrSelf
...@@ -65,20 +65,22 @@ func Encode(s []rune) []uint16 { ...@@ -65,20 +65,22 @@ func Encode(s []rune) []uint16 {
n = 0 n = 0
for _, v := range s { for _, v := range s {
switch { switch {
case v < 0, surr1 <= v && v < surr3, v > maxRune: case 0 <= v && v < surr1, surr3 <= v && v < surrSelf:
v = replacementChar // normal rune
fallthrough
case v < surrSelf:
a[n] = uint16(v) a[n] = uint16(v)
n++ n++
default: case surrSelf <= v && v <= maxRune:
// needs surrogate sequence
r1, r2 := EncodeRune(v) r1, r2 := EncodeRune(v)
a[n] = uint16(r1) a[n] = uint16(r1)
a[n+1] = uint16(r2) a[n+1] = uint16(r2)
n += 2 n += 2
default:
a[n] = uint16(replacementChar)
n++
} }
} }
return a[0:n] return a[:n]
} }
// Decode returns the Unicode code point sequence represented // Decode returns the Unicode code point sequence represented
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment