Commit 55337807 authored by Rui Ueyama's avatar Rui Ueyama Committed by Nigel Tao

strings: use Rabin-Karp algorithm for LastIndex.

benchmark                  old ns/op     new ns/op     delta
BenchmarkSingleMatch       49443         52275         +5.73%
BenchmarkIndex             28.8          27.4          -4.86%
BenchmarkLastIndex         14.5          14.0          -3.45%
BenchmarkLastIndexHard1    3982782       2309200       -42.02%
BenchmarkLastIndexHard2    3985562       2287715       -42.60%
BenchmarkLastIndexHard3    3555259       2282866       -35.79%

LGTM=josharian, nigeltao
R=golang-codereviews, ality, josharian, bradfitz, dave, nigeltao, gobot, nightlyone
CC=golang-codereviews
https://golang.org/cl/102560043
parent 933169fe
...@@ -43,13 +43,29 @@ func explode(s string, n int) []string { ...@@ -43,13 +43,29 @@ func explode(s string, n int) []string {
// primeRK is the prime base used in Rabin-Karp algorithm. // primeRK is the prime base used in Rabin-Karp algorithm.
const primeRK = 16777619 const primeRK = 16777619
// hashstr returns the hash and the appropriate multiplicative // hashStr returns the hash and the appropriate multiplicative
// factor for use in Rabin-Karp algorithm. // factor for use in Rabin-Karp algorithm.
func hashstr(sep string) (uint32, uint32) { func hashStr(sep string) (uint32, uint32) {
hash := uint32(0) hash := uint32(0)
for i := 0; i < len(sep); i++ { for i := 0; i < len(sep); i++ {
hash = hash*primeRK + uint32(sep[i]) hash = hash*primeRK + uint32(sep[i])
}
var pow, sq uint32 = 1, primeRK
for i := len(sep); i > 0; i >>= 1 {
if i&1 != 0 {
pow *= sq
}
sq *= sq
}
return hash, pow
}
// hashStrRev returns the hash of the reverse of sep and the
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
func hashStrRev(sep string) (uint32, uint32) {
hash := uint32(0)
for i := len(sep) - 1; i >= 0; i-- {
hash = hash*primeRK + uint32(sep[i])
} }
var pow, sq uint32 = 1, primeRK var pow, sq uint32 = 1, primeRK
for i := len(sep); i > 0; i >>= 1 { for i := len(sep); i > 0; i >>= 1 {
...@@ -85,7 +101,8 @@ func Count(s, sep string) int { ...@@ -85,7 +101,8 @@ func Count(s, sep string) int {
} }
return 0 return 0
} }
hashsep, pow := hashstr(sep) // Rabin-Karp search
hashsep, pow := hashStr(sep)
h := uint32(0) h := uint32(0)
for i := 0; i < len(sep); i++ { for i := 0; i < len(sep); i++ {
h = h*primeRK + uint32(s[i]) h = h*primeRK + uint32(s[i])
...@@ -139,8 +156,8 @@ func Index(s, sep string) int { ...@@ -139,8 +156,8 @@ func Index(s, sep string) int {
case n > len(s): case n > len(s):
return -1 return -1
} }
// Hash sep. // Rabin-Karp search
hashsep, pow := hashstr(sep) hashsep, pow := hashStr(sep)
var h uint32 var h uint32
for i := 0; i < n; i++ { for i := 0; i < n; i++ {
h = h*primeRK + uint32(s[i]) h = h*primeRK + uint32(s[i])
...@@ -163,22 +180,41 @@ func Index(s, sep string) int { ...@@ -163,22 +180,41 @@ func Index(s, sep string) int {
// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
func LastIndex(s, sep string) int { func LastIndex(s, sep string) int {
n := len(sep) n := len(sep)
if n == 0 { switch {
case n == 0:
return len(s) return len(s)
} case n == 1:
c := sep[0]
if n == 1 {
// special case worth making fast // special case worth making fast
c := sep[0]
for i := len(s) - 1; i >= 0; i-- { for i := len(s) - 1; i >= 0; i-- {
if s[i] == c { if s[i] == c {
return i return i
} }
} }
return -1 return -1
case n == len(s):
if sep == s {
return 0
}
return -1
case n > len(s):
return -1
}
// Rabin-Karp search from the end of the string
hashsep, pow := hashStrRev(sep)
last := len(s) - n
var h uint32
for i := len(s) - 1; i >= last; i-- {
h = h*primeRK + uint32(s[i])
}
if h == hashsep && s[last:] == sep {
return last
} }
// n > 1 for i := last - 1; i >= 0; i-- {
for i := len(s) - n; i >= 0; i-- { h *= primeRK
if s[i] == c && s[i:i+n] == sep { h += uint32(s[i])
h -= pow * uint32(s[i+n])
if h == hashsep && s[i:i+n] == sep {
return i return i
} }
} }
......
...@@ -168,6 +168,15 @@ func BenchmarkIndex(b *testing.B) { ...@@ -168,6 +168,15 @@ func BenchmarkIndex(b *testing.B) {
} }
} }
func BenchmarkLastIndex(b *testing.B) {
if got := Index(benchmarkString, "v"); got != 17 {
b.Fatalf("wrong index: expected 17, got=%d", got)
}
for i := 0; i < b.N; i++ {
LastIndex(benchmarkString, "v")
}
}
func BenchmarkIndexByte(b *testing.B) { func BenchmarkIndexByte(b *testing.B) {
if got := IndexByte(benchmarkString, 'v'); got != 17 { if got := IndexByte(benchmarkString, 'v'); got != 17 {
b.Fatalf("wrong index: expected 17, got=%d", got) b.Fatalf("wrong index: expected 17, got=%d", got)
...@@ -1087,6 +1096,12 @@ func benchmarkIndexHard(b *testing.B, sep string) { ...@@ -1087,6 +1096,12 @@ func benchmarkIndexHard(b *testing.B, sep string) {
} }
} }
func benchmarkLastIndexHard(b *testing.B, sep string) {
for i := 0; i < b.N; i++ {
LastIndex(benchInputHard, sep)
}
}
func benchmarkCountHard(b *testing.B, sep string) { func benchmarkCountHard(b *testing.B, sep string) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
Count(benchInputHard, sep) Count(benchInputHard, sep)
...@@ -1097,6 +1112,10 @@ func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") } ...@@ -1097,6 +1112,10 @@ func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") } func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") } func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") }
func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") } func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") } func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") } func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment