Commit e74c6cd3 authored by Martin Möhrmann's avatar Martin Möhrmann Committed by Brad Fitzpatrick

regexp: add ASCII fast path for context methods

The step method implementations check directly if the next rune
only needs one byte to be decoded and avoid calling utf8.DecodeRune
for such ASCII characters.

Introduce the same fast path optimization for rune decoding
for the context methods.

Results for regexp benchmarks that use the context methods:

name                            old time/op  new time/op  delta
AnchoredLiteralShortNonMatch-4  97.5ns ± 1%  94.8ns ± 2%  -2.80%  (p=0.000 n=45+43)
AnchoredShortMatch-4             163ns ± 1%   160ns ± 1%  -1.84%  (p=0.000 n=46+47)
NotOnePassShortA-4               742ns ± 2%   742ns ± 2%    ~     (p=0.440 n=49+50)
NotOnePassShortB-4               535ns ± 1%   533ns ± 2%  -0.37%  (p=0.005 n=46+48)
OnePassLongPrefix-4              169ns ± 2%   166ns ± 2%  -2.06%  (p=0.000 n=50+49)

Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335
Reviewed-on: https://go-review.googlesource.com/38256Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 8a16d7d4
...@@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int { ...@@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int {
func (i *inputString) context(pos int) syntax.EmptyOp { func (i *inputString) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText r1, r2 := endOfText, endOfText
if pos > 0 && pos <= len(i.str) { // 0 < pos && pos <= len(i.str)
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
}
} }
if pos < len(i.str) { // 0 <= pos && pos < len(i.str)
r2, _ = utf8.DecodeRuneInString(i.str[pos:]) if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
}
} }
return syntax.EmptyOpContext(r1, r2) return syntax.EmptyOpContext(r1, r2)
} }
...@@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int { ...@@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
func (i *inputBytes) context(pos int) syntax.EmptyOp { func (i *inputBytes) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText r1, r2 := endOfText, endOfText
if pos > 0 && pos <= len(i.str) { // 0 < pos && pos <= len(i.str)
r1, _ = utf8.DecodeLastRune(i.str[:pos]) if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRune(i.str[:pos])
}
} }
if pos < len(i.str) { // 0 <= pos && pos < len(i.str)
r2, _ = utf8.DecodeRune(i.str[pos:]) if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRune(i.str[pos:])
}
} }
return syntax.EmptyOpContext(r1, r2) return syntax.EmptyOpContext(r1, r2)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment