Commit 13cb62c7 authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

cmd/internal/gc, cmd/6g: generate boolean values without jumps

Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.

For example, given

func f(i, j int) bool {
	return i == j
}

Before

"".f t=1 size=32 value=0 args=0x18 locals=0x0
	0x0000 00000 (x.go:3)	TEXT	"".f(SB), $0-24
	0x0000 00000 (x.go:3)	FUNCDATA	$0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
	0x0000 00000 (x.go:3)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0000 00000 (x.go:4)	MOVQ	"".i+8(FP), BX
	0x0005 00005 (x.go:4)	MOVQ	"".j+16(FP), BP
	0x000a 00010 (x.go:4)	CMPQ	BX, BP
	0x000d 00013 (x.go:4)	JEQ	21
	0x000f 00015 (x.go:4)	MOVB	$0, "".~r2+24(FP)
	0x0014 00020 (x.go:4)	RET
	0x0015 00021 (x.go:4)	MOVB	$1, "".~r2+24(FP)
	0x001a 00026 (x.go:4)	JMP	20

After

"".f t=1 size=32 value=0 args=0x18 locals=0x0
	0x0000 00000 (x.go:3)	TEXT	"".f(SB), $0-24
	0x0000 00000 (x.go:3)	FUNCDATA	$0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
	0x0000 00000 (x.go:3)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0000 00000 (x.go:4)	MOVQ	"".i+8(FP), BX
	0x0005 00005 (x.go:4)	MOVQ	"".j+16(FP), BP
	0x000a 00010 (x.go:4)	CMPQ	BX, BP
	0x000d 00013 (x.go:4)	SETEQ	"".~r2+24(FP)
	0x0012 00018 (x.go:4)	RET

regexp benchmarks, best of 12 runs:

benchmark                                 old ns/op      new ns/op      delta
BenchmarkNotOnePassShortB                 782            733            -6.27%
BenchmarkLiteral                          180            171            -5.00%
BenchmarkNotLiteral                       2855           2721           -4.69%
BenchmarkMatchHard_32                     2672           2557           -4.30%
BenchmarkMatchHard_1K                     80182          76732          -4.30%
BenchmarkMatchEasy1_32M                   76440180       73304748       -4.10%
BenchmarkMatchEasy1_32K                   68798          66350          -3.56%
BenchmarkAnchoredLongMatch                482            465            -3.53%
BenchmarkMatchEasy1_1M                    2373042        2292692        -3.39%
BenchmarkReplaceAll                       2776           2690           -3.10%
BenchmarkNotOnePassShortA                 1397           1360           -2.65%
BenchmarkMatchClass_InRange               3842           3742           -2.60%
BenchmarkMatchEasy0_32                    125            122            -2.40%
BenchmarkMatchEasy0_32K                   11414          11164          -2.19%
BenchmarkMatchEasy0_1K                    668            654            -2.10%
BenchmarkAnchoredShortMatch               260            255            -1.92%
BenchmarkAnchoredLiteralShortNonMatch     164            161            -1.83%
BenchmarkOnePassShortB                    623            612            -1.77%
BenchmarkOnePassShortA                    801            788            -1.62%
BenchmarkMatchClass                       4094           4033           -1.49%
BenchmarkMatchEasy0_32M                   14078800       13890704       -1.34%
BenchmarkMatchHard_32K                    4095844        4045820        -1.22%
BenchmarkMatchEasy1_1K                    1663           1643           -1.20%
BenchmarkMatchHard_1M                     131261708      129708215      -1.18%
BenchmarkMatchHard_32M                    4210112412     4169292003     -0.97%
BenchmarkMatchMedium_32K                  2460752        2438611        -0.90%
BenchmarkMatchEasy0_1M                    422914         419672         -0.77%
BenchmarkMatchMedium_1M                   78581121       78040160       -0.69%
BenchmarkMatchMedium_32M                  2515287278     2498464906     -0.67%
BenchmarkMatchMedium_32                   1754           1746           -0.46%
BenchmarkMatchMedium_1K                   52105          52106          +0.00%
BenchmarkAnchoredLiteralLongNonMatch      185            185            +0.00%
BenchmarkMatchEasy1_32                    107            107            +0.00%
BenchmarkOnePassLongNotPrefix             505            505            +0.00%
BenchmarkOnePassLongPrefix                147            147            +0.00%

The godoc binary is ~0.12% smaller after this CL.

Updates #5729.

toolstash -cmp passes for all architectures other than amd64 and amd64p32.

Other architectures can be done in follow-up CLs.

Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284Reviewed-by: default avatarRuss Cox <rsc@golang.org>
parent 9c1868d0
...@@ -10,11 +10,11 @@ import ( ...@@ -10,11 +10,11 @@ import (
"cmd/internal/obj/x86" "cmd/internal/obj/x86"
) )
var thechar int = '6' var (
thechar int = '6'
var thestring string = "amd64" thestring string = "amd64"
thelinkarch *obj.LinkArch = &x86.Linkamd64
var thelinkarch *obj.LinkArch = &x86.Linkamd64 )
func linkarchinit() { func linkarchinit() {
if obj.Getgoarch() == "amd64p32" { if obj.Getgoarch() == "amd64p32" {
...@@ -27,13 +27,12 @@ func linkarchinit() { ...@@ -27,13 +27,12 @@ func linkarchinit() {
var MAXWIDTH int64 = 1 << 50 var MAXWIDTH int64 = 1 << 50
var addptr int = x86.AADDQ var (
addptr int = x86.AADDQ
var movptr int = x86.AMOVQ movptr int = x86.AMOVQ
leaptr int = x86.ALEAQ
var leaptr int = x86.ALEAQ cmpptr int = x86.ACMPQ
)
var cmpptr int = x86.ACMPQ
/* /*
* go declares several platform-specific type aliases: * go declares several platform-specific type aliases:
...@@ -101,6 +100,7 @@ func main() { ...@@ -101,6 +100,7 @@ func main() {
gc.Thearch.Expandchecks = expandchecks gc.Thearch.Expandchecks = expandchecks
gc.Thearch.Getg = getg gc.Thearch.Getg = getg
gc.Thearch.Gins = gins gc.Thearch.Gins = gins
gc.Thearch.Ginsboolval = ginsboolval
gc.Thearch.Ginscon = ginscon gc.Thearch.Ginscon = ginscon
gc.Thearch.Ginsnop = ginsnop gc.Thearch.Ginsnop = ginsnop
gc.Thearch.Gmove = gmove gc.Thearch.Gmove = gmove
......
...@@ -99,6 +99,10 @@ func ginscon(as int, c int64, n2 *gc.Node) { ...@@ -99,6 +99,10 @@ func ginscon(as int, c int64, n2 *gc.Node) {
gins(as, &n1, n2) gins(as, &n1, n2)
} }
func ginsboolval(a int, n *gc.Node) {
gins(jmptoset(a), nil, n)
}
/* /*
* set up nodes representing 2^63 * set up nodes representing 2^63
*/ */
...@@ -698,6 +702,21 @@ func optoas(op int, t *gc.Type) int { ...@@ -698,6 +702,21 @@ func optoas(op int, t *gc.Type) int {
gc.OPS<<16 | gc.TFLOAT64: gc.OPS<<16 | gc.TFLOAT64:
a = x86.AJPS a = x86.AJPS
case gc.OPC<<16 | gc.TBOOL,
gc.OPC<<16 | gc.TINT8,
gc.OPC<<16 | gc.TUINT8,
gc.OPC<<16 | gc.TINT16,
gc.OPC<<16 | gc.TUINT16,
gc.OPC<<16 | gc.TINT32,
gc.OPC<<16 | gc.TUINT32,
gc.OPC<<16 | gc.TINT64,
gc.OPC<<16 | gc.TUINT64,
gc.OPC<<16 | gc.TPTR32,
gc.OPC<<16 | gc.TPTR64,
gc.OPC<<16 | gc.TFLOAT32,
gc.OPC<<16 | gc.TFLOAT64:
a = x86.AJPC
case gc.OLT<<16 | gc.TINT8, case gc.OLT<<16 | gc.TINT8,
gc.OLT<<16 | gc.TINT16, gc.OLT<<16 | gc.TINT16,
gc.OLT<<16 | gc.TINT32, gc.OLT<<16 | gc.TINT32,
...@@ -902,7 +921,8 @@ func optoas(op int, t *gc.Type) int { ...@@ -902,7 +921,8 @@ func optoas(op int, t *gc.Type) int {
gc.OMINUS<<16 | gc.TPTR64: gc.OMINUS<<16 | gc.TPTR64:
a = x86.ANEGQ a = x86.ANEGQ
case gc.OAND<<16 | gc.TINT8, case gc.OAND<<16 | gc.TBOOL,
gc.OAND<<16 | gc.TINT8,
gc.OAND<<16 | gc.TUINT8: gc.OAND<<16 | gc.TUINT8:
a = x86.AANDB a = x86.AANDB
...@@ -920,7 +940,8 @@ func optoas(op int, t *gc.Type) int { ...@@ -920,7 +940,8 @@ func optoas(op int, t *gc.Type) int {
gc.OAND<<16 | gc.TPTR64: gc.OAND<<16 | gc.TPTR64:
a = x86.AANDQ a = x86.AANDQ
case gc.OOR<<16 | gc.TINT8, case gc.OOR<<16 | gc.TBOOL,
gc.OOR<<16 | gc.TINT8,
gc.OOR<<16 | gc.TUINT8: gc.OOR<<16 | gc.TUINT8:
a = x86.AORB a = x86.AORB
...@@ -1134,6 +1155,46 @@ func optoas(op int, t *gc.Type) int { ...@@ -1134,6 +1155,46 @@ func optoas(op int, t *gc.Type) int {
return a return a
} }
// jmptoset returns ASETxx for AJxx.
func jmptoset(jmp int) int {
switch jmp {
case x86.AJEQ:
return x86.ASETEQ
case x86.AJNE:
return x86.ASETNE
case x86.AJLT:
return x86.ASETLT
case x86.AJCS:
return x86.ASETCS
case x86.AJLE:
return x86.ASETLE
case x86.AJLS:
return x86.ASETLS
case x86.AJGT:
return x86.ASETGT
case x86.AJHI:
return x86.ASETHI
case x86.AJGE:
return x86.ASETGE
case x86.AJCC:
return x86.ASETCC
case x86.AJMI:
return x86.ASETMI
case x86.AJOC:
return x86.ASETOC
case x86.AJOS:
return x86.ASETOS
case x86.AJPC:
return x86.ASETPC
case x86.AJPL:
return x86.ASETPL
case x86.AJPS:
return x86.ASETPS
}
gc.Fatal("jmptoset: no entry for %v", gc.Oconv(jmp, 0))
panic("unreachable")
}
const ( const (
ODynam = 1 << 0 ODynam = 1 << 0
OAddable = 1 << 1 OAddable = 1 << 1
......
...@@ -196,6 +196,22 @@ var progtable = [x86.ALAST]obj.ProgInfo{ ...@@ -196,6 +196,22 @@ var progtable = [x86.ALAST]obj.ProgInfo{
x86.ASBBL: {gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0}, x86.ASBBL: {gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
x86.ASBBQ: {gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0}, x86.ASBBQ: {gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
x86.ASBBW: {gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0}, x86.ASBBW: {gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
x86.ASETCC: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETCS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETEQ: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETGE: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETGT: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETHI: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETLE: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETLS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETLT: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETMI: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETNE: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETOC: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETOS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETPC: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETPL: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETPS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASHLB: {gc.SizeB | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0}, x86.ASHLB: {gc.SizeB | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
x86.ASHLL: {gc.SizeL | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0}, x86.ASHLL: {gc.SizeL | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
x86.ASHLQ: {gc.SizeQ | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0}, x86.ASHLQ: {gc.SizeQ | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
......
This diff is collapsed.
...@@ -14,7 +14,7 @@ func overlap_cplx(f *Node, t *Node) bool { ...@@ -14,7 +14,7 @@ func overlap_cplx(f *Node, t *Node) bool {
return f.Op == OINDREG && t.Op == OINDREG && f.Xoffset+f.Type.Width >= t.Xoffset && t.Xoffset+t.Type.Width >= f.Xoffset return f.Op == OINDREG && t.Op == OINDREG && f.Xoffset+f.Type.Width >= t.Xoffset && t.Xoffset+t.Type.Width >= f.Xoffset
} }
func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) { func complexbool(op int, nl, nr, res *Node, wantTrue bool, likely int, to *obj.Prog) {
// make both sides addable in ullman order // make both sides addable in ullman order
if nr != nil { if nr != nil {
if nl.Ullman > nr.Ullman && !nl.Addable { if nl.Ullman > nr.Ullman && !nl.Addable {
...@@ -35,7 +35,10 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) ...@@ -35,7 +35,10 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog)
subnode(&rreal, &rimag, nr) subnode(&rreal, &rimag, nr)
// build tree // build tree
// real(l) == real(r) && imag(l) == imag(r) // if branching:
// real(l) == real(r) && imag(l) == imag(r)
// if generating a value, use a branch-free version:
// real(l) == real(r) & imag(l) == imag(r)
realeq := Node{ realeq := Node{
Op: OEQ, Op: OEQ,
Left: &lreal, Left: &lreal,
...@@ -55,6 +58,19 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) ...@@ -55,6 +58,19 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog)
Type: Types[TBOOL], Type: Types[TBOOL],
} }
if res != nil {
// generating a value
and.Op = OAND
if op == ONE {
and.Op = OOR
realeq.Op = ONE
imageq.Op = ONE
}
Bvgen(&and, res, true)
return
}
// generating a branch
if op == ONE { if op == ONE {
wantTrue = !wantTrue wantTrue = !wantTrue
} }
......
...@@ -791,6 +791,13 @@ type Arch struct { ...@@ -791,6 +791,13 @@ type Arch struct {
Expandchecks func(*obj.Prog) Expandchecks func(*obj.Prog)
Getg func(*Node) Getg func(*Node)
Gins func(int, *Node, *Node) *obj.Prog Gins func(int, *Node, *Node) *obj.Prog
// Ginsboolval inserts instructions to convert the result
// of a just-completed comparison to a boolean value.
// The first argument is the conditional jump instruction
// corresponding to the desired value.
// The second argument is the destination.
// If not present, Ginsboolval will be emulated with jumps.
Ginsboolval func(int, *Node)
Ginscon func(int, int64, *Node) Ginscon func(int, int64, *Node)
Ginsnop func() Ginsnop func()
Gmove func(*Node, *Node) Gmove func(*Node, *Node)
......
...@@ -306,6 +306,7 @@ const ( ...@@ -306,6 +306,7 @@ const (
ORROTC // right rotate-carry: ARCR. ORROTC // right rotate-carry: ARCR.
ORETJMP // return to other function ORETJMP // return to other function
OPS // compare parity set (for x86 NaN check) OPS // compare parity set (for x86 NaN check)
OPC // compare parity clear (for x86 NaN check)
OSQRT // sqrt(float64), on systems that have hw support OSQRT // sqrt(float64), on systems that have hw support
OGETG // runtime.getg() (read g pointer) OGETG // runtime.getg() (read g pointer)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment