Commit 85fc7653 authored by Matthew Dempsky's avatar Matthew Dempsky

cmd/compile: optimize switch on strings

When compiling expression switches, we try to optimize runs of
constants into binary searches. The ordering used isn't visible to the
application, so it's unimportant as long as we're consistent between
sorting and searching.

For strings, it's much cheaper to compare string lengths than strings
themselves, so instead of ordering strings by "si <= sj", we currently
order them by "len(si) < len(sj) || len(si) == len(sj) && si <= sj"
(i.e., the lexicographical ordering on the 2-tuple (len(s), s)).

However, it's also somewhat cheaper to compare strings for equality
(i.e., ==) than for ordering (i.e., <=). And if there were two or
three string constants of the same length in a switch statement, we
might unnecessarily emit ordering comparisons.

For example, given:

    switch s {
    case "", "1", "2", "3": // ordered by length then content
        goto L
    }

we currently compile this as:

    if len(s) < 1 || len(s) == 1 && s <= "1" {
        if s == "" { goto L }
        else if s == "1" { goto L }
    } else {
        if s == "2" { goto L }
        else if s == "3" { goto L }
    }

This CL switches to using a 2-level binary search---first on len(s),
then on s itself---so that string ordering comparisons are only needed
when there are 4 or more strings of the same length. (4 being the
cut-off for when using binary search is actually worthwhile.)

So the above switch instead now compiles to:

    if len(s) == 0 {
        if s == "" { goto L }
    } else if len(s) == 1 {
        if s == "1" { goto L }
        else if s == "2" { goto L }
        else if s == "3" { goto L }
    }

which is better optimized by walk and SSA. (Notably, because there are
only two distinct lengths and no more than three strings of any
particular length, this example ends up falling back to simply using
linear search.)

Test case by khr@ from CL 195138.

Fixes #33934.

Change-Id: I8eeebcaf7e26343223be5f443d6a97a0daf84f07
Reviewed-on: https://go-review.googlesource.com/c/go/+/195340
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 770fac45
...@@ -349,24 +349,52 @@ func (s *exprSwitch) flush() { ...@@ -349,24 +349,52 @@ func (s *exprSwitch) flush() {
// (e.g., sort.Slice doesn't need to invoke the less function // (e.g., sort.Slice doesn't need to invoke the less function
// when there's only a single slice element). // when there's only a single slice element).
// Sort strings by length and then by value. if s.exprname.Type.IsString() && len(cc) >= 2 {
// It is much cheaper to compare lengths than values, // Sort strings by length and then by value. It is
// and all we need here is consistency. // much cheaper to compare lengths than values, and
// We respect this sorting below. // all we need here is consistency. We respect this
sort.Slice(cc, func(i, j int) bool { // sorting below.
vi := cc[i].lo.Val() sort.Slice(cc, func(i, j int) bool {
vj := cc[j].lo.Val() si := strlit(cc[i].lo)
sj := strlit(cc[j].lo)
if s.exprname.Type.IsString() {
si := vi.U.(string)
sj := vj.U.(string)
if len(si) != len(sj) { if len(si) != len(sj) {
return len(si) < len(sj) return len(si) < len(sj)
} }
return si < sj return si < sj
})
// runLen returns the string length associated with a
// particular run of exprClauses.
runLen := func(run []exprClause) int64 { return int64(len(strlit(run[0].lo))) }
// Collapse runs of consecutive strings with the same length.
var runs [][]exprClause
start := 0
for i := 1; i < len(cc); i++ {
if runLen(cc[start:]) != runLen(cc[i:]) {
runs = append(runs, cc[start:i])
start = i
}
} }
runs = append(runs, cc[start:])
// Perform two-level binary search.
nlen := nod(OLEN, s.exprname, nil)
binarySearch(len(runs), &s.done,
func(i int) *Node {
return nod(OLE, nlen, nodintconst(runLen(runs[i-1])))
},
func(i int, nif *Node) {
run := runs[i]
nif.Left = nod(OEQ, nlen, nodintconst(runLen(run)))
s.search(run, &nif.Nbody)
},
)
return
}
return compareOp(vi, OLT, vj) sort.Slice(cc, func(i, j int) bool {
return compareOp(cc[i].lo.Val(), OLT, cc[j].lo.Val())
}) })
// Merge consecutive integer cases. // Merge consecutive integer cases.
...@@ -383,19 +411,13 @@ func (s *exprSwitch) flush() { ...@@ -383,19 +411,13 @@ func (s *exprSwitch) flush() {
cc = merged cc = merged
} }
binarySearch(len(cc), &s.done, s.search(cc, &s.done)
}
func (s *exprSwitch) search(cc []exprClause, out *Nodes) {
binarySearch(len(cc), out,
func(i int) *Node { func(i int) *Node {
mid := cc[i-1].hi return nod(OLE, s.exprname, cc[i-1].hi)
le := nod(OLE, s.exprname, mid)
if s.exprname.Type.IsString() {
// Compare strings by length and then
// by value; see sort.Slice above.
lenlt := nod(OLT, nod(OLEN, s.exprname, nil), nod(OLEN, mid, nil))
leneq := nod(OEQ, nod(OLEN, s.exprname, nil), nod(OLEN, mid, nil))
le = nod(OOROR, lenlt, nod(OANDAND, leneq, le))
}
return le
}, },
func(i int, nif *Node) { func(i int, nif *Node) {
c := &cc[i] c := &cc[i]
......
// asmcheck
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// These tests check code generation of switch statements.
package codegen
// see issue 33934
func f(x string) int {
// amd64:-`cmpstring`
switch x {
case "":
return -1
case "1", "2", "3":
return -2
default:
return -3
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment