Commit 15817e40 authored by Michael Munday's avatar Michael Munday

cmd/compile: make link register allocatable in non-leaf functions

We save and restore the link register in non-leaf functions because
it is clobbered by CALLs. It is therefore available for general
purpose use.

Only enabled on s390x currently. The RC4 benchmarks in particular
benefit from the extra register:

name     old speed     new speed     delta
RC4_128  243MB/s ± 2%  341MB/s ± 2%  +40.46%  (p=0.008 n=5+5)
RC4_1K   267MB/s ± 0%  359MB/s ± 1%  +34.32%  (p=0.008 n=5+5)
RC4_8K   271MB/s ± 0%  362MB/s ± 0%  +33.61%  (p=0.008 n=5+5)

Change-Id: Id23bff95e771da9425353da2f32668b8e34ba09f
Reviewed-on: https://go-review.googlesource.com/30597Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 809a1de1
......@@ -25,6 +25,7 @@ type Config struct {
fpRegMask regMask // floating point register mask
specialRegMask regMask // special register mask
FPReg int8 // register number of frame pointer, -1 if not used
LinkReg int8 // register number of link register if it is a general purpose register, -1 if not used
hasGReg bool // has hardware g register
fe Frontend // callbacks into compiler frontend
HTML *HTMLWriter // html writer, for debugging
......@@ -143,6 +144,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskAMD64
c.fpRegMask = fpRegMaskAMD64
c.FPReg = framepointerRegAMD64
c.LinkReg = linkRegAMD64
c.hasGReg = false
case "amd64p32":
c.IntSize = 4
......@@ -154,6 +156,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskAMD64
c.fpRegMask = fpRegMaskAMD64
c.FPReg = framepointerRegAMD64
c.LinkReg = linkRegAMD64
c.hasGReg = false
c.noDuffDevice = true
case "386":
......@@ -166,6 +169,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMask386
c.fpRegMask = fpRegMask386
c.FPReg = framepointerReg386
c.LinkReg = linkReg386
c.hasGReg = false
case "arm":
c.IntSize = 4
......@@ -177,6 +181,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskARM
c.fpRegMask = fpRegMaskARM
c.FPReg = framepointerRegARM
c.LinkReg = linkRegARM
c.hasGReg = true
case "arm64":
c.IntSize = 8
......@@ -188,6 +193,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskARM64
c.fpRegMask = fpRegMaskARM64
c.FPReg = framepointerRegARM64
c.LinkReg = linkRegARM64
c.hasGReg = true
c.noDuffDevice = obj.GOOS == "darwin" // darwin linker cannot handle BR26 reloc with non-zero addend
case "ppc64":
......@@ -203,6 +209,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskPPC64
c.fpRegMask = fpRegMaskPPC64
c.FPReg = framepointerRegPPC64
c.LinkReg = linkRegPPC64
c.noDuffDevice = true // TODO: Resolve PPC64 DuffDevice (has zero, but not copy)
c.NeedsFpScratch = true
c.hasGReg = true
......@@ -217,6 +224,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.fpRegMask = fpRegMaskMIPS64
c.specialRegMask = specialRegMaskMIPS64
c.FPReg = framepointerRegMIPS64
c.LinkReg = linkRegMIPS64
c.hasGReg = true
case "s390x":
c.IntSize = 8
......@@ -228,6 +236,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskS390X
c.fpRegMask = fpRegMaskS390X
c.FPReg = framepointerRegS390X
c.LinkReg = linkRegS390X
c.hasGReg = true
c.noDuffDevice = true
default:
......
......@@ -501,5 +501,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: int8(num["BP"]),
linkreg: -1, // not used
})
}
......@@ -588,5 +588,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: int8(num["BP"]),
linkreg: -1, // not used
})
}
......@@ -530,5 +530,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: -1, // not used
linkreg: -1, // not used
})
}
......@@ -526,5 +526,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: -1, // not used
linkreg: -1, // not used
})
}
......@@ -376,5 +376,6 @@ func init() {
fpregmask: fp,
specialregmask: hi | lo,
framepointerreg: -1, // not used
linkreg: -1, // not used
})
}
......@@ -393,5 +393,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: int8(num["SP"]),
linkreg: -1, // not used
})
}
......@@ -91,7 +91,7 @@ func init() {
r0 = buildReg("R0")
// R10 and R11 are reserved by the assembler.
gp = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12")
gp = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14")
gpsp = gp | sp
// R0 is considered to contain the value 0 in address calculations.
......@@ -547,5 +547,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: -1, // not used
linkreg: int8(num["R14"]),
})
}
......@@ -31,6 +31,7 @@ type arch struct {
fpregmask regMask
specialregmask regMask
framepointerreg int8
linkreg int8
generic bool
}
......@@ -295,6 +296,7 @@ func genOp() {
fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask)
fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask)
fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg)
fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg)
}
// gofmt result
......
This diff is collapsed.
......@@ -460,6 +460,18 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line
return c
}
// isLeaf reports whether f performs any calls.
func isLeaf(f *Func) bool {
for _, b := range f.Blocks {
for _, v := range b.Values {
if opcodeTable[v.Op].call {
return false
}
}
}
return true
}
func (s *regAllocState) init(f *Func) {
s.f = f
s.registers = f.Config.registers
......@@ -510,6 +522,12 @@ func (s *regAllocState) init(f *Func) {
s.allocatable &^= 1 << 12 // R12
}
}
if s.f.Config.LinkReg != -1 {
if isLeaf(f) {
// Leaf functions don't save/restore the link register.
s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
}
}
if s.f.Config.ctxt.Flag_dynlink {
switch s.f.Config.arch {
case "amd64":
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment