Commit c03ed491 authored by Keith Randall's avatar Keith Randall

cmd/compile: load some live values into registers before loop

If we're about to enter a loop, load values which are live
and will soon be used in the loop into registers.

name                     old time/op    new time/op    delta
BinaryTree17-8              2.80s ± 4%     2.62s ± 2%   -6.43%          (p=0.008 n=5+5)
Fannkuch11-8                2.45s ± 2%     2.14s ± 1%  -12.43%          (p=0.008 n=5+5)
FmtFprintfEmpty-8          49.0ns ± 1%    48.4ns ± 1%   -1.35%          (p=0.032 n=5+5)
FmtFprintfString-8          160ns ± 1%     153ns ± 0%   -4.63%          (p=0.008 n=5+5)
FmtFprintfInt-8             152ns ± 0%     150ns ± 0%   -1.57%          (p=0.000 n=5+4)
FmtFprintfIntInt-8          252ns ± 2%     244ns ± 1%   -3.02%          (p=0.008 n=5+5)
FmtFprintfPrefixedInt-8     223ns ± 0%     223ns ± 0%     ~     (all samples are equal)
FmtFprintfFloat-8           293ns ± 2%     291ns ± 2%     ~             (p=0.389 n=5+5)
FmtManyArgs-8               956ns ± 0%     936ns ± 0%   -2.05%          (p=0.008 n=5+5)
GobDecode-8                7.18ms ± 0%    7.11ms ± 0%   -1.02%          (p=0.008 n=5+5)
GobEncode-8                6.12ms ± 3%    6.07ms ± 1%     ~             (p=0.690 n=5+5)
Gzip-8                      284ms ± 1%     284ms ± 0%     ~             (p=1.000 n=5+5)
Gunzip-8                   40.8ms ± 1%    40.6ms ± 1%     ~             (p=0.310 n=5+5)
HTTPClientServer-8         69.8µs ± 1%    72.2µs ± 4%     ~             (p=0.056 n=5+5)
JSONEncode-8               16.1ms ± 2%    16.2ms ± 1%     ~             (p=0.151 n=5+5)
JSONDecode-8               54.9ms ± 0%    57.0ms ± 1%   +3.79%          (p=0.008 n=5+5)
Mandelbrot200-8            4.35ms ± 0%    4.39ms ± 0%   +0.85%          (p=0.008 n=5+5)
GoParse-8                  3.56ms ± 1%    3.42ms ± 1%   -4.03%          (p=0.008 n=5+5)
RegexpMatchEasy0_32-8      75.6ns ± 1%    75.0ns ± 0%   -0.83%          (p=0.016 n=5+4)
RegexpMatchEasy0_1K-8       250ns ± 0%     252ns ± 1%   +0.80%          (p=0.016 n=4+5)
RegexpMatchEasy1_32-8      75.0ns ± 0%    75.4ns ± 2%     ~             (p=0.206 n=5+5)
RegexpMatchEasy1_1K-8       401ns ± 0%     398ns ± 1%     ~             (p=0.056 n=5+5)
RegexpMatchMedium_32-8      119ns ± 0%     118ns ± 0%   -0.84%          (p=0.008 n=5+5)
RegexpMatchMedium_1K-8     36.6µs ± 0%    36.9µs ± 0%   +0.91%          (p=0.008 n=5+5)
RegexpMatchHard_32-8       1.95µs ± 1%    1.92µs ± 0%   -1.23%          (p=0.032 n=5+5)
RegexpMatchHard_1K-8       58.3µs ± 1%    58.1µs ± 1%     ~             (p=0.548 n=5+5)
Revcomp-8                   425ms ± 1%     389ms ± 1%   -8.39%          (p=0.008 n=5+5)
Template-8                 65.5ms ± 1%    63.6ms ± 1%   -2.86%          (p=0.008 n=5+5)
TimeParse-8                 363ns ± 0%     354ns ± 1%   -2.59%          (p=0.008 n=5+5)
TimeFormat-8                363ns ± 0%     364ns ± 1%     ~             (p=0.159 n=5+5)

Fixes #14511

Change-Id: I1b79d2545271fa90d5b04712cc25573bdc94f2ce
Reviewed-on: https://go-review.googlesource.com/20151
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent 5be961a3
...@@ -5081,8 +5081,10 @@ func moveByType(t ssa.Type) int { ...@@ -5081,8 +5081,10 @@ func moveByType(t ssa.Type) int {
return x86.AMOVL return x86.AMOVL
case 8: case 8:
return x86.AMOVQ return x86.AMOVQ
case 16:
return x86.AMOVUPS // int128s are in SSE registers
default: default:
panic("bad int register width") panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
} }
} }
panic("bad register type") panic("bad register type")
......
...@@ -102,6 +102,14 @@ import ( ...@@ -102,6 +102,14 @@ import (
const regDebug = false // TODO: compiler flag const regDebug = false // TODO: compiler flag
const logSpills = false const logSpills = false
// distance is a measure of how far into the future values are used.
// distance is measured in units of instructions.
const (
likelyDistance = 1
normalDistance = 10
unlikelyDistance = 100
)
// regalloc performs register allocation on f. It sets f.RegAlloc // regalloc performs register allocation on f. It sets f.RegAlloc
// to the resulting allocation. // to the resulting allocation.
func regalloc(f *Func) { func regalloc(f *Func) {
...@@ -550,7 +558,7 @@ func (s *regAllocState) setState(regs []endReg) { ...@@ -550,7 +558,7 @@ func (s *regAllocState) setState(regs []endReg) {
// compatRegs returns the set of registers which can store a type t. // compatRegs returns the set of registers which can store a type t.
func (s *regAllocState) compatRegs(t Type) regMask { func (s *regAllocState) compatRegs(t Type) regMask {
var m regMask var m regMask
if t.IsFloat() { if t.IsFloat() || t == TypeInt128 {
m = 0xffff << 16 // X0-X15 m = 0xffff << 16 // X0-X15
} else { } else {
m = 0xffef << 0 // AX-R15, except SP m = 0xffef << 0 // AX-R15, except SP
...@@ -576,8 +584,12 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -576,8 +584,12 @@ func (s *regAllocState) regalloc(f *Func) {
// Initialize liveSet and uses fields for this block. // Initialize liveSet and uses fields for this block.
// Walk backwards through the block doing liveness analysis. // Walk backwards through the block doing liveness analysis.
liveSet.clear() liveSet.clear()
d := int32(len(b.Values))
if b.Kind == BlockCall {
d += unlikelyDistance
}
for _, e := range s.live[b.ID] { for _, e := range s.live[b.ID] {
s.addUse(e.ID, int32(len(b.Values))+e.dist) // pseudo-uses from beyond end of block s.addUse(e.ID, d+e.dist) // pseudo-uses from beyond end of block
liveSet.add(e.ID) liveSet.add(e.ID)
} }
if v := b.Control; v != nil && s.values[v.ID].needReg { if v := b.Control; v != nil && s.values[v.ID].needReg {
...@@ -944,11 +956,11 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -944,11 +956,11 @@ func (s *regAllocState) regalloc(f *Func) {
} }
} }
// Load control value into reg.
if v := b.Control; v != nil && s.values[v.ID].needReg { if v := b.Control; v != nil && s.values[v.ID].needReg {
if regDebug { if regDebug {
fmt.Printf(" processing control %s\n", v.LongString()) fmt.Printf(" processing control %s\n", v.LongString())
} }
// Load control value into reg.
// TODO: regspec for block control values, instead of using // TODO: regspec for block control values, instead of using
// register set from the control op's output. // register set from the control op's output.
s.allocValToReg(v, opcodeTable[v.Op].reg.outputs[0], false, b.Line) s.allocValToReg(v, opcodeTable[v.Op].reg.outputs[0], false, b.Line)
...@@ -963,6 +975,53 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -963,6 +975,53 @@ func (s *regAllocState) regalloc(f *Func) {
s.freeUseRecords = u s.freeUseRecords = u
} }
// If we are approaching a merge point and we are the primary
// predecessor of it, find live values that we use soon after
// the merge point and promote them to registers now.
if len(b.Succs) == 1 && len(b.Succs[0].Preds) > 1 && b.Succs[0].Preds[s.primary[b.Succs[0].ID]] == b {
// For this to be worthwhile, the loop must have no calls in it.
// Use a very simple loop detector. TODO: incorporate David's loop stuff
// once it is in.
top := b.Succs[0]
for _, p := range top.Preds {
if p == b {
continue
}
for {
if p.Kind == BlockCall {
goto badloop
}
if p == top {
break
}
if len(p.Preds) != 1 {
goto badloop
}
p = p.Preds[0]
}
}
// TODO: sort by distance, pick the closest ones?
for _, live := range s.live[b.ID] {
if live.dist >= unlikelyDistance {
// Don't preload anything live after the loop.
continue
}
vid := live.ID
vi := &s.values[vid]
if vi.regs != 0 {
continue
}
v := s.orig[vid]
m := s.compatRegs(v.Type) &^ s.used
if m != 0 {
s.allocValToReg(v, m, false, b.Line)
}
}
}
badloop:
;
// Save end-of-block register state. // Save end-of-block register state.
// First count how many, this cuts allocations in half. // First count how many, this cuts allocations in half.
k := 0 k := 0
...@@ -1539,8 +1598,14 @@ func (s *regAllocState) computeLive() { ...@@ -1539,8 +1598,14 @@ func (s *regAllocState) computeLive() {
// Add len(b.Values) to adjust from end-of-block distance // Add len(b.Values) to adjust from end-of-block distance
// to beginning-of-block distance. // to beginning-of-block distance.
live.clear() live.clear()
d := int32(len(b.Values))
if b.Kind == BlockCall {
// Because we keep no values in registers across a call,
// make every use past a call very far away.
d += unlikelyDistance
}
for _, e := range s.live[b.ID] { for _, e := range s.live[b.ID] {
live.set(e.ID, e.dist+int32(len(b.Values))) live.set(e.ID, e.dist+d)
} }
// Mark control value as live // Mark control value as live
...@@ -1570,20 +1635,17 @@ func (s *regAllocState) computeLive() { ...@@ -1570,20 +1635,17 @@ func (s *regAllocState) computeLive() {
// invariant: live contains the values live at the start of b (excluding phi inputs) // invariant: live contains the values live at the start of b (excluding phi inputs)
for i, p := range b.Preds { for i, p := range b.Preds {
// Compute additional distance for the edge. // Compute additional distance for the edge.
const normalEdge = 10
const likelyEdge = 1
const unlikelyEdge = 100
// Note: delta must be at least 1 to distinguish the control // Note: delta must be at least 1 to distinguish the control
// value use from the first user in a successor block. // value use from the first user in a successor block.
delta := int32(normalEdge) delta := int32(normalDistance)
if len(p.Succs) == 2 { if len(p.Succs) == 2 {
if p.Succs[0] == b && p.Likely == BranchLikely || if p.Succs[0] == b && p.Likely == BranchLikely ||
p.Succs[1] == b && p.Likely == BranchUnlikely { p.Succs[1] == b && p.Likely == BranchUnlikely {
delta = likelyEdge delta = likelyDistance
} }
if p.Succs[0] == b && p.Likely == BranchUnlikely || if p.Succs[0] == b && p.Likely == BranchUnlikely ||
p.Succs[1] == b && p.Likely == BranchLikely { p.Succs[1] == b && p.Likely == BranchLikely {
delta = unlikelyEdge delta = unlikelyDistance
} }
} }
......
...@@ -46,13 +46,14 @@ type Type interface { ...@@ -46,13 +46,14 @@ type Type interface {
// Special compiler-only types. // Special compiler-only types.
type CompilerType struct { type CompilerType struct {
Name string Name string
size int64
Memory bool Memory bool
Flags bool Flags bool
Void bool Void bool
Int128 bool Int128 bool
} }
func (t *CompilerType) Size() int64 { return 0 } // Size in bytes func (t *CompilerType) Size() int64 { return t.size } // Size in bytes
func (t *CompilerType) Alignment() int64 { return 0 } func (t *CompilerType) Alignment() int64 { return 0 }
func (t *CompilerType) IsBoolean() bool { return false } func (t *CompilerType) IsBoolean() bool { return false }
func (t *CompilerType) IsInteger() bool { return false } func (t *CompilerType) IsInteger() bool { return false }
...@@ -127,5 +128,5 @@ var ( ...@@ -127,5 +128,5 @@ var (
TypeMem = &CompilerType{Name: "mem", Memory: true} TypeMem = &CompilerType{Name: "mem", Memory: true}
TypeFlags = &CompilerType{Name: "flags", Flags: true} TypeFlags = &CompilerType{Name: "flags", Flags: true}
TypeVoid = &CompilerType{Name: "void", Void: true} TypeVoid = &CompilerType{Name: "void", Void: true}
TypeInt128 = &CompilerType{Name: "int128", Int128: true} TypeInt128 = &CompilerType{Name: "int128", size: 16, Int128: true}
) )
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment