Commit 92c826b1 authored by Russ Cox's avatar Russ Cox

cmd/internal/gc: inline runtime.getg

This more closely restores what the old C runtime did.
(In C, g was an 'extern register' with the same effective
implementation as in this CL.)

On a late 2012 MacBookPro10,2, best of 5 old vs best of 5 new:

benchmark                          old ns/op      new ns/op      delta
BenchmarkBinaryTree17              4981312777     4463426605     -10.40%
BenchmarkFannkuch11                3046495712     3006819428     -1.30%
BenchmarkFmtFprintfEmpty           89.3           79.8           -10.64%
BenchmarkFmtFprintfString          284            262            -7.75%
BenchmarkFmtFprintfInt             282            262            -7.09%
BenchmarkFmtFprintfIntInt          480            448            -6.67%
BenchmarkFmtFprintfPrefixedInt     382            358            -6.28%
BenchmarkFmtFprintfFloat           529            486            -8.13%
BenchmarkFmtManyArgs               1849           1773           -4.11%
BenchmarkGobDecode                 12835963       11794385       -8.11%
BenchmarkGobEncode                 10527170       10288422       -2.27%
BenchmarkGzip                      436109569      438422516      +0.53%
BenchmarkGunzip                    110121663      109843648      -0.25%
BenchmarkHTTPClientServer          81930          85446          +4.29%
BenchmarkJSONEncode                24638574       24280603       -1.45%
BenchmarkJSONDecode                93022423       85753546       -7.81%
BenchmarkMandelbrot200             4703899        4735407        +0.67%
BenchmarkGoParse                   5319853        5086843        -4.38%
BenchmarkRegexpMatchEasy0_32       151            151            +0.00%
BenchmarkRegexpMatchEasy0_1K       452            453            +0.22%
BenchmarkRegexpMatchEasy1_32       131            132            +0.76%
BenchmarkRegexpMatchEasy1_1K       761            722            -5.12%
BenchmarkRegexpMatchMedium_32      228            224            -1.75%
BenchmarkRegexpMatchMedium_1K      63751          64296          +0.85%
BenchmarkRegexpMatchHard_32        3188           3238           +1.57%
BenchmarkRegexpMatchHard_1K        95396          96756          +1.43%
BenchmarkRevcomp                   661587262      687107364      +3.86%
BenchmarkTemplate                  108312598      104008540      -3.97%
BenchmarkTimeParse                 453            459            +1.32%
BenchmarkTimeFormat                475            441            -7.16%

The garbage benchmark from the benchmarks subrepo gets 2.6% faster as well.

Change-Id: I320aeda332db81012688b26ffab23f6581c59cfa
Reviewed-on: https://go-review.googlesource.com/8460Reviewed-by: default avatarRick Hudson <rlh@golang.org>
Run-TryBot: Rick Hudson <rlh@golang.org>
Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 8e95654a
......@@ -64,6 +64,7 @@ func main() {
gc.Thearch.Defframe = defframe
gc.Thearch.Excise = excise
gc.Thearch.Expandchecks = expandchecks
gc.Thearch.Getg = getg
gc.Thearch.Gins = gins
gc.Thearch.Ginscon = ginscon
gc.Thearch.Ginsnop = ginsnop
......
......@@ -494,3 +494,10 @@ func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
}
return false
}
// res = runtime.getg()
func getg(res *gc.Node) {
var n1 gc.Node
gc.Nodreg(&n1, res.Type, arm.REGG)
gmove(&n1, res)
}
......@@ -96,6 +96,7 @@ func main() {
gc.Thearch.Dodiv = dodiv
gc.Thearch.Excise = excise
gc.Thearch.Expandchecks = expandchecks
gc.Thearch.Getg = getg
gc.Thearch.Gins = gins
gc.Thearch.Ginscon = ginscon
gc.Thearch.Ginsnop = ginsnop
......
......@@ -671,3 +671,20 @@ func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
}
return false
}
// res = runtime.getg()
func getg(res *gc.Node) {
var n1 gc.Node
gc.Regalloc(&n1, res.Type, res)
mov := optoas(gc.OAS, gc.Types[gc.Tptr])
p := gins(mov, nil, &n1)
p.From.Type = obj.TYPE_REG
p.From.Reg = x86.REG_TLS
p = gins(mov, nil, &n1)
p.From = p.To
p.From.Type = obj.TYPE_MEM
p.From.Index = x86.REG_TLS
p.From.Scale = 1
gmove(&n1, res)
gc.Regfree(&n1)
}
......@@ -62,6 +62,7 @@ func main() {
gc.Thearch.Dodiv = dodiv
gc.Thearch.Excise = excise
gc.Thearch.Expandchecks = expandchecks
gc.Thearch.Getg = getg
gc.Thearch.Gins = gins
gc.Thearch.Ginscon = ginscon
gc.Thearch.Ginsnop = ginsnop
......
......@@ -532,3 +532,10 @@ func expandchecks(firstp *obj.Prog) {
p2.To.Offset = 0
}
}
// res = runtime.getg()
func getg(res *gc.Node) {
var n1 gc.Node
gc.Nodreg(&n1, res.Type, arm64.REGG)
gmove(&n1, res)
}
......@@ -77,6 +77,7 @@ func main() {
gc.Thearch.Dodiv = cgen_div
gc.Thearch.Excise = excise
gc.Thearch.Expandchecks = expandchecks
gc.Thearch.Getg = getg
gc.Thearch.Gins = gins
gc.Thearch.Ginscon = ginscon
gc.Thearch.Ginsnop = ginsnop
......
......@@ -944,3 +944,20 @@ func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
}
return false
}
// res = runtime.getg()
func getg(res *gc.Node) {
var n1 gc.Node
gc.Regalloc(&n1, res.Type, res)
mov := optoas(gc.OAS, gc.Types[gc.Tptr])
p := gins(mov, nil, &n1)
p.From.Type = obj.TYPE_REG
p.From.Reg = x86.REG_TLS
p = gins(mov, nil, &n1)
p.From = p.To
p.From.Type = obj.TYPE_MEM
p.From.Index = x86.REG_TLS
p.From.Scale = 1
gmove(&n1, res)
gc.Regfree(&n1)
}
......@@ -71,6 +71,7 @@ func main() {
gc.Thearch.Dodiv = dodiv
gc.Thearch.Excise = excise
gc.Thearch.Expandchecks = expandchecks
gc.Thearch.Getg = getg
gc.Thearch.Gins = gins
gc.Thearch.Ginscon = ginscon
gc.Thearch.Ginsnop = ginsnop
......
......@@ -549,3 +549,10 @@ func expandchecks(firstp *obj.Prog) {
p2.To.Offset = 0
}
}
// res = runtime.getg()
func getg(res *gc.Node) {
var n1 gc.Node
gc.Nodreg(&n1, res.Type, ppc64.REGG)
gmove(&n1, res)
}
......@@ -418,6 +418,10 @@ func Cgen(n *Node, res *Node) {
Regfree(&n1)
return
case OGETG:
Thearch.Getg(res)
return
// symmetric binary
case OAND,
OOR,
......
......@@ -976,6 +976,7 @@ var opprec = []int{
OCONV: 8,
OCOPY: 8,
ODELETE: 8,
OGETG: 8,
OLEN: 8,
OLITERAL: 8,
OMAKESLICE: 8,
......@@ -1363,7 +1364,7 @@ func exprfmt(n *Node, prec int) string {
}
return fmt.Sprintf("%v(%v)", Oconv(int(n.Op), obj.FmtSharp), Hconv(n.List, obj.FmtComma))
case OCALL, OCALLFUNC, OCALLINTER, OCALLMETH:
case OCALL, OCALLFUNC, OCALLINTER, OCALLMETH, OGETG:
var f string
f += exprfmt(n.Left, nprec)
if n.Isddd {
......
......@@ -1002,6 +1002,10 @@ func gen(n *Node) {
case ORETURN, ORETJMP:
cgen_ret(n)
// Function calls turned into compiler intrinsics.
// At top level, can just ignore the call and make sure to preserve side effects in the argument, if any.
case OGETG:
// nothing
case OSQRT:
cgen_discard(n.Left)
......
......@@ -798,6 +798,7 @@ type Arch struct {
Dodiv func(int, *Node, *Node, *Node)
Excise func(*Flow)
Expandchecks func(*obj.Prog)
Getg func(*Node)
Gins func(int, *Node, *Node) *obj.Prog
Ginscon func(int, int64, *Node)
Ginsnop func()
......
......@@ -304,6 +304,7 @@ const (
ORETJMP // return to other function
OPS // compare parity set (for x86 NaN check)
OSQRT // sqrt(float64), on systems that have hw support
OGETG // runtime.getg() (read g pointer)
OEND
)
......
......@@ -1366,6 +1366,17 @@ OpSwitch:
t = t.Type
}
n.Type = t
if n.Op == OCALLFUNC && n.Left.Op == ONAME && (compiling_runtime != 0 || n.Left.Sym.Pkg == Runtimepkg) && n.Left.Sym.Name == "getg" {
// Emit code for runtime.getg() directly instead of calling function.
// Most such rewrites (for example the similar one for math.Sqrt) should be done in walk,
// so that the ordering pass can make sure to preserve the semantics of the original code
// (in particular, the exact time of the function call) by introducing temporaries.
// In this case, we know getg() always returns the same result within a given function
// and we want to avoid the temporaries, so we do the rewrite earlier than is typical.
n.Op = OGETG
}
break OpSwitch
}
......@@ -1376,6 +1387,7 @@ OpSwitch:
}
n.Type = getoutargx(l.Type)
break OpSwitch
case OCAP, OLEN, OREAL, OIMAG:
......
......@@ -179,7 +179,8 @@ func walkstmt(np **Node) {
OPRINTN,
OPANIC,
OEMPTY,
ORECOVER:
ORECOVER,
OGETG:
if n.Typecheck == 0 {
Fatal("missing typecheck: %v", Nconv(n, obj.FmtSign))
}
......@@ -424,7 +425,8 @@ func walkexpr(np **Node, init **NodeList) {
ONONAME,
OINDREG,
OEMPTY,
OPARAM:
OPARAM,
OGETG:
goto ret
case ONOT,
......
......@@ -1639,12 +1639,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0
// traceback from goexit1 must hit code range of goexit
BYTE $0x90 // NOP
TEXT runtime·getg(SB),NOSPLIT,$0-4
get_tls(CX)
MOVL g(CX), AX
MOVL AX, ret+0(FP)
RET
TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
MOVL addr+0(FP), AX
PREFETCHT0 (AX)
......
......@@ -1673,12 +1673,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0
// traceback from goexit1 must hit code range of goexit
BYTE $0x90 // NOP
TEXT runtime·getg(SB),NOSPLIT,$0-8
get_tls(CX)
MOVQ g(CX), AX
MOVQ AX, ret+0(FP)
RET
TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
MOVQ addr+0(FP), AX
PREFETCHT0 (AX)
......
......@@ -1096,12 +1096,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0
// traceback from goexit1 must hit code range of goexit
BYTE $0x90 // NOP
TEXT runtime·getg(SB),NOSPLIT,$0-4
get_tls(CX)
MOVL g(CX), AX
MOVL AX, ret+0(FP)
RET
TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
MOVL addr+0(FP), AX
PREFETCHT0 (AX)
......
......@@ -984,10 +984,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-4-0
// traceback from goexit1 must hit code range of goexit
MOVW R0, R0 // NOP
TEXT runtime·getg(SB),NOSPLIT,$-4-4
MOVW g, ret+0(FP)
RET
TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
RET
......
......@@ -903,10 +903,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0
MOVD R0, R0 // NOP
BL runtime·goexit1(SB) // does not return
TEXT runtime·getg(SB),NOSPLIT,$-8-8
MOVD g, ret+0(FP)
RET
// TODO(aram): use PRFM here.
TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
RET
......
......@@ -1134,10 +1134,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0
// traceback from goexit1 must hit code range of goexit
MOVD R0, R0 // NOP
TEXT runtime·getg(SB),NOSPLIT,$-8-8
MOVD g, ret+0(FP)
RETURN
TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
RETURN
......
......@@ -18,6 +18,9 @@ func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
return unsafe.Pointer(uintptr(p) + x)
}
// getg returns the pointer to the current g.
// The compiler rewrites calls to this function into instructions
// that fetch the g directly (from TLS or from the dedicated register).
func getg() *g
// mcall switches from the g to the g0 stack and invokes fn(g),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment