Commit 0cff219c authored by Ilya Tocar's avatar Ilya Tocar

strings: use AVX2 for Index if available

IndexHard4-4      1.50ms ± 2%  0.71ms ± 0%  -52.36%  (p=0.000 n=20+19)

This also fixes a bug, that caused a string of length 16 to use
two 8-byte comparisons instead of one 16-byte. And adds a test for
cases when partial_match fails.

Change-Id: I1ee8fc4e068bb36c95c45de78f067c822c0d9df0
Reviewed-on: https://go-review.googlesource.com/22551
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 83c73a85
...@@ -9,7 +9,17 @@ package bytes ...@@ -9,7 +9,17 @@ package bytes
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s. // indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
// indexShortStr requires 2 <= len(c) <= shortStringLen // indexShortStr requires 2 <= len(c) <= shortStringLen
func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
const shortStringLen = 31 func supportAVX2() bool // ../runtime/asm_$GOARCH.s
var shortStringLen int
func init() {
if supportAVX2() {
shortStringLen = 63
} else {
shortStringLen = 31
}
}
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
func Index(s, sep []byte) int { func Index(s, sep []byte) int {
......
...@@ -1695,6 +1695,16 @@ big_loop_avx2_exit: ...@@ -1695,6 +1695,16 @@ big_loop_avx2_exit:
JMP loop JMP loop
TEXT strings·supportAVX2(SB),NOSPLIT,$0-1
MOVBLZX runtime·support_avx2(SB), AX
MOVB AX, ret+0(FP)
RET
TEXT bytes·supportAVX2(SB),NOSPLIT,$0-1
MOVBLZX runtime·support_avx2(SB), AX
MOVB AX, ret+0(FP)
RET
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40 TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
MOVQ s+0(FP), DI MOVQ s+0(FP), DI
// We want len in DX and AX, because PCMPESTRI implicitly consumes them // We want len in DX and AX, because PCMPESTRI implicitly consumes them
...@@ -1809,7 +1819,7 @@ loop8: ...@@ -1809,7 +1819,7 @@ loop8:
JB loop8 JB loop8
JMP fail JMP fail
_9_or_more: _9_or_more:
CMPQ AX, $16 CMPQ AX, $15
JA _16_or_more JA _16_or_more
LEAQ 1(DI)(DX*1), DX LEAQ 1(DI)(DX*1), DX
SUBQ AX, DX SUBQ AX, DX
...@@ -1833,7 +1843,7 @@ partial_success9to15: ...@@ -1833,7 +1843,7 @@ partial_success9to15:
JMP fail JMP fail
_16_or_more: _16_or_more:
CMPQ AX, $16 CMPQ AX, $16
JA _17_to_31 JA _17_or_more
MOVOU (BP), X1 MOVOU (BP), X1
LEAQ -15(DI)(DX*1), DX LEAQ -15(DI)(DX*1), DX
loop16: loop16:
...@@ -1846,7 +1856,9 @@ loop16: ...@@ -1846,7 +1856,9 @@ loop16:
CMPQ DI,DX CMPQ DI,DX
JB loop16 JB loop16
JMP fail JMP fail
_17_to_31: _17_or_more:
CMPQ AX, $31
JA _32_or_more
LEAQ 1(DI)(DX*1), DX LEAQ 1(DI)(DX*1), DX
SUBQ AX, DX SUBQ AX, DX
MOVOU -16(BP)(AX*1), X0 MOVOU -16(BP)(AX*1), X0
...@@ -1870,9 +1882,56 @@ partial_success17to31: ...@@ -1870,9 +1882,56 @@ partial_success17to31:
ADDQ $1,DI ADDQ $1,DI
CMPQ DI,DX CMPQ DI,DX
JB loop17to31 JB loop17to31
JMP fail
// We can get here only when AVX2 is enabled and cutoff for indexShortStr is set to 63
// So no need to check cpuid
_32_or_more:
CMPQ AX, $32
JA _33_to_63
VMOVDQU (BP), Y1
LEAQ -31(DI)(DX*1), DX
loop32:
VMOVDQU (DI), Y2
VPCMPEQB Y1, Y2, Y3
VPMOVMSKB Y3, SI
CMPL SI, $0xffffffff
JE success_avx2
ADDQ $1,DI
CMPQ DI,DX
JB loop32
JMP fail_avx2
_33_to_63:
LEAQ 1(DI)(DX*1), DX
SUBQ AX, DX
VMOVDQU -32(BP)(AX*1), Y0
VMOVDQU (BP), Y1
loop33to63:
VMOVDQU (DI), Y2
VPCMPEQB Y1, Y2, Y3
VPMOVMSKB Y3, SI
CMPL SI, $0xffffffff
JE partial_success33to63
ADDQ $1,DI
CMPQ DI,DX
JB loop33to63
JMP fail_avx2
partial_success33to63:
VMOVDQU -32(AX)(DI*1), Y3
VPCMPEQB Y0, Y3, Y4
VPMOVMSKB Y4, SI
CMPL SI, $0xffffffff
JE success_avx2
ADDQ $1,DI
CMPQ DI,DX
JB loop33to63
fail_avx2:
VZEROUPPER
fail: fail:
MOVQ $-1, (R11) MOVQ $-1, (R11)
RET RET
success_avx2:
VZEROUPPER
JMP success
sse42: sse42:
MOVL runtime·cpuid_ecx(SB), CX MOVL runtime·cpuid_ecx(SB), CX
ANDL $0x100000, CX ANDL $0x100000, CX
......
...@@ -9,7 +9,17 @@ package strings ...@@ -9,7 +9,17 @@ package strings
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s. // indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
// indexShortStr requires 2 <= len(c) <= shortStringLen // indexShortStr requires 2 <= len(c) <= shortStringLen
func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
const shortStringLen = 31 func supportAVX2() bool // ../runtime/asm_$GOARCH.s
var shortStringLen int
func init() {
if supportAVX2() {
shortStringLen = 63
} else {
shortStringLen = 31
}
}
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
func Index(s, sep string) int { func Index(s, sep string) int {
......
...@@ -86,32 +86,44 @@ var indexTests = []IndexTest{ ...@@ -86,32 +86,44 @@ var indexTests = []IndexTest{
{"32145678", "01234567", -1}, {"32145678", "01234567", -1},
{"01234567", "01234567", 0}, {"01234567", "01234567", 0},
{"x01234567", "01234567", 1}, {"x01234567", "01234567", 1},
{"x0123456x01234567", "01234567", 9},
{"xx01234567"[:9], "01234567", -1}, {"xx01234567"[:9], "01234567", -1},
{"", "0123456789", -1}, {"", "0123456789", -1},
{"3214567844", "0123456789", -1}, {"3214567844", "0123456789", -1},
{"0123456789", "0123456789", 0}, {"0123456789", "0123456789", 0},
{"x0123456789", "0123456789", 1}, {"x0123456789", "0123456789", 1},
{"x012345678x0123456789", "0123456789", 11},
{"xyz0123456789"[:12], "0123456789", -1}, {"xyz0123456789"[:12], "0123456789", -1},
{"x01234567x89", "0123456789", -1}, {"x01234567x89", "0123456789", -1},
{"", "0123456789012345", -1}, {"", "0123456789012345", -1},
{"3214567889012345", "0123456789012345", -1}, {"3214567889012345", "0123456789012345", -1},
{"0123456789012345", "0123456789012345", 0}, {"0123456789012345", "0123456789012345", 0},
{"x0123456789012345", "0123456789012345", 1}, {"x0123456789012345", "0123456789012345", 1},
{"x012345678901234x0123456789012345", "0123456789012345", 17},
{"", "01234567890123456789", -1}, {"", "01234567890123456789", -1},
{"32145678890123456789", "01234567890123456789", -1}, {"32145678890123456789", "01234567890123456789", -1},
{"01234567890123456789", "01234567890123456789", 0}, {"01234567890123456789", "01234567890123456789", 0},
{"x01234567890123456789", "01234567890123456789", 1}, {"x01234567890123456789", "01234567890123456789", 1},
{"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
{"xyz01234567890123456789"[:22], "01234567890123456789", -1}, {"xyz01234567890123456789"[:22], "01234567890123456789", -1},
{"", "0123456789012345678901234567890", -1}, {"", "0123456789012345678901234567890", -1},
{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1}, {"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
{"0123456789012345678901234567890", "0123456789012345678901234567890", 0}, {"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1}, {"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
{"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1}, {"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
{"", "01234567890123456789012345678901", -1}, {"", "01234567890123456789012345678901", -1},
{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1}, {"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
{"01234567890123456789012345678901", "01234567890123456789012345678901", 0}, {"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1}, {"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
{"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1}, {"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
{"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
{"", "0123456789012345678901234567890123456789", -1},
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
{"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
} }
var lastIndexTests = []IndexTest{ var lastIndexTests = []IndexTest{
...@@ -1315,6 +1327,9 @@ func benchmarkCountHard(b *testing.B, sep string) { ...@@ -1315,6 +1327,9 @@ func benchmarkCountHard(b *testing.B, sep string) {
func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") } func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") } func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") } func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
func BenchmarkIndexHard4(b *testing.B) {
benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
}
func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") } func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") } func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment