Commit 25e0a367 authored by Keith Randall's avatar Keith Randall

[dev.ssa] cmd/compile: clean up tuple types and selects

Make tuple types and their SelectX ops fully generic.
These ops no longer need to be lowered.
Regalloc understands them and their tuple-generating arguments.
We can now have opcodes returning arbitrary pairs of results.
(And it would be easy to move to >2 results if needed.)

Update arm implementation to the new standard.
Implement just enough in 386 port to do 64-bit add.

Change-Id: I370ed5aacce219c82e1954c61d1f63af76c16f79
Reviewed-on: https://go-review.googlesource.com/24976Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 6b6de15d
......@@ -278,7 +278,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = r
case ssa.OpARMADDS,
ssa.OpARMSUBS:
r := gc.SSARegNum(v)
r := gc.SSARegNum1(v)
r1 := gc.SSARegNum(v.Args[0])
r2 := gc.SSARegNum(v.Args[1])
p := gc.Prog(v.Op.Asm())
......@@ -351,7 +351,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Offset = v.AuxInt
p.Reg = gc.SSARegNum(v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum(v)
p.To.Reg = gc.SSARegNum1(v)
case ssa.OpARMSRRconst:
genshift(arm.AMOVW, 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_RR, v.AuxInt)
case ssa.OpARMADDshiftLL,
......@@ -368,7 +368,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftLL,
ssa.OpARMSUBSshiftLL,
ssa.OpARMRSBSshiftLL:
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt)
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LL, v.AuxInt)
p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRL,
ssa.OpARMADCshiftRL,
......@@ -384,7 +384,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRL,
ssa.OpARMSUBSshiftRL,
ssa.OpARMRSBSshiftRL:
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LR, v.AuxInt)
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LR, v.AuxInt)
p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRA,
ssa.OpARMADCshiftRA,
......@@ -400,7 +400,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRA,
ssa.OpARMSUBSshiftRA,
ssa.OpARMRSBSshiftRA:
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_AR, v.AuxInt)
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_AR, v.AuxInt)
p.Scond = arm.C_SBIT
case ssa.OpARMMVNshiftLL:
genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt)
......@@ -428,7 +428,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftLLreg,
ssa.OpARMSUBSshiftLLreg,
ssa.OpARMRSBSshiftLLreg:
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum(v), arm.SHIFT_LL)
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LL)
p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRLreg,
ssa.OpARMADCshiftRLreg,
......@@ -444,7 +444,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRLreg,
ssa.OpARMSUBSshiftRLreg,
ssa.OpARMRSBSshiftRLreg:
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum(v), arm.SHIFT_LR)
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LR)
p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRAreg,
ssa.OpARMADCshiftRAreg,
......@@ -460,7 +460,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRAreg,
ssa.OpARMSUBSshiftRAreg,
ssa.OpARMRSBSshiftRAreg:
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum(v), arm.SHIFT_AR)
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_AR)
p.Scond = arm.C_SBIT
case ssa.OpARMHMUL,
ssa.OpARMHMULU:
......@@ -473,14 +473,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = gc.SSARegNum(v)
p.To.Offset = arm.REGTMP // throw away low 32-bit into tmp register
case ssa.OpARMMULLU:
// 32-bit multiplication, results 64-bit, low 32-bit in reg(v), high 32-bit in R0
// 32-bit multiplication, results 64-bit, high 32-bit in out0, low 32-bit in out1
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = gc.SSARegNum(v.Args[0])
p.Reg = gc.SSARegNum(v.Args[1])
p.To.Type = obj.TYPE_REGREG
p.To.Reg = arm.REG_R0 // high 32-bit
p.To.Offset = int64(gc.SSARegNum(v)) // low 32-bit
p.To.Reg = gc.SSARegNum0(v) // high 32-bit
p.To.Offset = int64(gc.SSARegNum1(v)) // low 32-bit
case ssa.OpARMMULA:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
......@@ -928,9 +928,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Offset = 1
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum(v)
case ssa.OpARMCarry,
ssa.OpARMLoweredSelect0,
ssa.OpARMLoweredSelect1:
case ssa.OpSelect0, ssa.OpSelect1:
// nothing to do
case ssa.OpARMLoweredGetClosurePtr:
// Closure pointer is R7 (arm.REGCTXT).
......
......@@ -4266,11 +4266,39 @@ func SSAReg(v *ssa.Value) *ssa.Register {
return reg.(*ssa.Register)
}
// SSAReg0 returns the register to which the first output of v has been allocated.
func SSAReg0(v *ssa.Value) *ssa.Register {
reg := v.Block.Func.RegAlloc[v.ID].(ssa.LocPair)[0]
if reg == nil {
v.Fatalf("nil first register for value: %s\n%s\n", v.LongString(), v.Block.Func)
}
return reg.(*ssa.Register)
}
// SSAReg1 returns the register to which the second output of v has been allocated.
func SSAReg1(v *ssa.Value) *ssa.Register {
reg := v.Block.Func.RegAlloc[v.ID].(ssa.LocPair)[1]
if reg == nil {
v.Fatalf("nil second register for value: %s\n%s\n", v.LongString(), v.Block.Func)
}
return reg.(*ssa.Register)
}
// SSARegNum returns the register number (in cmd/internal/obj numbering) to which v has been allocated.
func SSARegNum(v *ssa.Value) int16 {
return Thearch.SSARegToReg[SSAReg(v).Num]
}
// SSARegNum0 returns the register number (in cmd/internal/obj numbering) to which the first output of v has been allocated.
func SSARegNum0(v *ssa.Value) int16 {
return Thearch.SSARegToReg[SSAReg0(v).Num]
}
// SSARegNum1 returns the register number (in cmd/internal/obj numbering) to which the second output of v has been allocated.
func SSARegNum1(v *ssa.Value) int16 {
return Thearch.SSARegToReg[SSAReg1(v).Num]
}
// CheckLoweredPhi checks that regalloc and stackalloc correctly handled phi values.
// Called during ssaGenValue.
func CheckLoweredPhi(v *ssa.Value) {
......
......@@ -171,10 +171,10 @@ func cse(f *Func) {
if rewrite[v.ID] != nil {
continue
}
if !v.Op.isTupleSelector() {
if v.Op != OpSelect0 && v.Op != OpSelect1 {
continue
}
if !v.Args[0].Op.isTupleGenerator() {
if !v.Args[0].Type.IsTuple() {
f.Fatalf("arg of tuple selector %s is not a tuple: %s", v.String(), v.Args[0].LongString())
}
t := rewrite[v.Args[0].ID]
......
......@@ -10,6 +10,9 @@
(Add32F x y) -> (ADDSS x y)
(Add64F x y) -> (ADDSD x y)
(Add32carry x y) -> (ADDLcarry x y)
(Add32withcarry x y c) -> (ADCL x y c)
(SubPtr x y) -> (SUBL x y)
(Sub32 x y) -> (SUBL x y)
(Sub16 x y) -> (SUBL x y)
......
......@@ -99,6 +99,8 @@ func init() {
gp11nf = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered
gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly, clobbers: flags}
gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{flags, gp}}
gp2carry1 = regInfo{inputs: []regMask{gp, gp, flags}, outputs: gponly}
gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly, clobbers: flags}
gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}, clobbers: flags}
......@@ -171,6 +173,9 @@ func init() {
{name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true}, // arg0 + arg1
{name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32"}, // arg0 + auxint
{name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1, generates <carry,result> pair
{name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true}, // arg0+arg1+carry(arg2), where arg2 is flags
{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true}, // arg0 - arg1
{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true}, // arg0 - auxint
......
......@@ -366,10 +366,6 @@
(IsSliceInBounds idx len) -> (LessEqualU (CMP idx len))
// pseudo-ops
(Select0 <t> x) && t.IsFlags() -> (Carry x)
(Select0 <t> x) && !t.IsFlags() -> (LoweredSelect0 x)
(Select1 x) -> (LoweredSelect1 x)
(GetClosurePtr) -> (LoweredGetClosurePtr)
(Convert x mem) -> (MOVWconvert x mem)
......
This diff is collapsed.
......@@ -42,8 +42,8 @@ type opData struct {
aux string
rematerializeable bool
argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments
commutative bool // this operation is commutative (e.g. addition)
resultInArg0 bool // v and v.Args[0] must be allocated to the same register
commutative bool // this operation is commutative on its first 2 arguments (e.g. addition)
resultInArg0 bool // last output of v and v.Args[0] must be allocated to the same register
}
type blockData struct {
......@@ -160,11 +160,11 @@ func genOp() {
}
if v.resultInArg0 {
fmt.Fprintln(w, "resultInArg0: true,")
if v.reg.inputs[0] != v.reg.outputs[0] {
log.Fatalf("input[0] and output registers must be equal for %s", v.name)
if v.reg.inputs[0] != v.reg.outputs[len(v.reg.outputs)-1] {
log.Fatalf("input[0] and last output register must be equal for %s", v.name)
}
if v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
log.Fatalf("input[1] and output registers must be equal for %s", v.name)
if v.commutative && v.reg.inputs[1] != v.reg.outputs[len(v.reg.outputs)-1] {
log.Fatalf("input[1] and last output register must be equal for %s", v.name)
}
}
if a.name == "generic" {
......@@ -196,14 +196,24 @@ func genOp() {
}
fmt.Fprintln(w, "},")
}
if v.reg.clobbers > 0 {
fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers))
}
// reg outputs
if len(v.reg.outputs) > 0 {
fmt.Fprintln(w, "outputs: []regMask{")
for _, r := range v.reg.outputs {
fmt.Fprintf(w, "%d,%s\n", r, a.regMaskComment(r))
s = s[:0]
for i, r := range v.reg.outputs {
if r != 0 {
s = append(s, intPair{countRegs(r), i})
}
}
if len(s) > 0 {
sort.Sort(byKey(s))
fmt.Fprintln(w, "outputs: []outputInfo{")
for _, p := range s {
r := v.reg.outputs[p.val]
fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
}
fmt.Fprintln(w, "},")
}
......
......@@ -359,7 +359,7 @@ func (v *Value) LongHTML() string {
}
r := v.Block.Func.RegAlloc
if int(v.ID) < len(r) && r[v.ID] != nil {
s += " : " + r[v.ID].Name()
s += " : " + html.EscapeString(r[v.ID].Name())
}
s += "</span>"
return s
......
......@@ -36,3 +36,16 @@ func (s LocalSlot) Name() string {
}
return fmt.Sprintf("%s+%d[%s]", s.N, s.Off, s.Type)
}
type LocPair [2]Location
func (t LocPair) Name() string {
n0, n1 := "nil", "nil"
if t[0] != nil {
n0 = t[0].Name()
}
if t[1] != nil {
n1 = t[1].Name()
}
return fmt.Sprintf("<%s,%s>", n0, n1)
}
......@@ -21,7 +21,7 @@ func checkLower(f *Func) {
continue // lowered
}
switch v.Op {
case OpSP, OpSB, OpInitMem, OpArg, OpPhi, OpVarDef, OpVarKill, OpVarLive, OpKeepAlive:
case OpSP, OpSB, OpInitMem, OpArg, OpPhi, OpVarDef, OpVarKill, OpVarLive, OpKeepAlive, OpSelect0, OpSelect1:
continue // ok not to lower
case OpGetG:
if f.Config.hasGReg {
......
......@@ -26,7 +26,7 @@ type opInfo struct {
generic bool // this is a generic (arch-independent) opcode
rematerializeable bool // this op is rematerializeable
commutative bool // this operation is commutative (e.g. addition)
resultInArg0 bool // v and v.Args[0] must be allocated to the same register
resultInArg0 bool // last output of v and v.Args[0] must be allocated to the same register
}
type inputInfo struct {
......@@ -34,10 +34,15 @@ type inputInfo struct {
regs regMask // allowed input registers
}
type outputInfo struct {
idx int // index in output tuple
regs regMask // allowed output registers
}
type regInfo struct {
inputs []inputInfo // ordered in register allocation order
clobbers regMask
outputs []regMask // NOTE: values can only have 1 output for now.
outputs []outputInfo // ordered in register allocation order
}
type auxType int8
......@@ -152,28 +157,3 @@ func MakeSizeAndAlign(size, align int64) SizeAndAlign {
}
return SizeAndAlign(size | align<<56)
}
func (op Op) isTupleGenerator() bool {
switch op {
case OpAdd32carry, OpSub32carry, OpMul32uhilo,
OpARMADDS, OpARMSUBS, OpARMMULLU,
OpARMADDSconst, OpARMSUBSconst, OpARMRSBSconst,
OpARMADDSshiftLL, OpARMSUBSshiftLL, OpARMRSBSshiftLL,
OpARMADDSshiftRL, OpARMSUBSshiftRL, OpARMRSBSshiftRL,
OpARMADDSshiftRA, OpARMSUBSshiftRA, OpARMRSBSshiftRA,
OpARMADDSshiftLLreg, OpARMSUBSshiftLLreg, OpARMRSBSshiftLLreg,
OpARMADDSshiftRLreg, OpARMSUBSshiftRLreg, OpARMRSBSshiftRLreg,
OpARMADDSshiftRAreg, OpARMSUBSshiftRAreg, OpARMRSBSshiftRAreg:
return true
}
return false
}
func (op Op) isTupleSelector() bool {
switch op {
case OpSelect0, OpSelect1,
OpARMLoweredSelect0, OpARMLoweredSelect1, OpARMCarry:
return true
}
return false
}
This diff is collapsed.
......@@ -333,10 +333,10 @@ func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
s.f.setHome(c, &s.registers[r])
}
// allocReg chooses a register for v from the set of registers in mask.
// allocReg chooses a register from the set of registers in mask.
// If there is no unused register, a Value will be kicked out of
// a register to make room.
func (s *regAllocState) allocReg(v *Value, mask regMask) register {
func (s *regAllocState) allocReg(mask regMask) register {
mask &= s.allocatable
mask &^= s.nospill
if mask == 0 {
......@@ -401,7 +401,7 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line
}
// Allocate a register.
r := s.allocReg(v, mask)
r := s.allocReg(mask)
// Allocate v to the new register.
var c *Value
......@@ -471,7 +471,7 @@ func (s *regAllocState) init(f *Func) {
}
// Figure out which registers we're allowed to use.
s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.flagRegMask
s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask
s.allocatable &^= 1 << s.SPReg
s.allocatable &^= 1 << s.SBReg
if s.f.Config.hasGReg {
......@@ -499,11 +499,13 @@ func (s *regAllocState) init(f *Func) {
s.orig = make([]*Value, f.NumValues())
for _, b := range f.Blocks {
for _, v := range b.Values {
if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() {
if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
s.values[v.ID].needReg = true
s.values[v.ID].rematerializeable = v.rematerializeable()
s.orig[v.ID] = v
}
// Note: needReg is false for values returning Tuple types.
// Instead, we mark the corresponding Selects as needReg.
}
}
s.computeLive()
......@@ -947,6 +949,7 @@ func (s *regAllocState) regalloc(f *Func) {
if s.f.pass.debug > regDebug {
fmt.Printf(" processing %s\n", v.LongString())
}
regspec := opcodeTable[v.Op].reg
if v.Op == OpPhi {
f.Fatalf("phi %s not at start of block", v)
}
......@@ -962,6 +965,18 @@ func (s *regAllocState) regalloc(f *Func) {
s.advanceUses(v)
continue
}
if v.Op == OpSelect0 || v.Op == OpSelect1 {
if s.values[v.ID].needReg {
var i = 0
if v.Op == OpSelect1 {
i = 1
}
s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).Num), v, v)
}
b.Values = append(b.Values, v)
s.advanceUses(v)
goto issueSpill
}
if v.Op == OpGetG && s.f.Config.hasGReg {
// use hardware g register
if s.regs[s.GReg].v != nil {
......@@ -970,17 +985,7 @@ func (s *regAllocState) regalloc(f *Func) {
s.assignReg(s.GReg, v, v)
b.Values = append(b.Values, v)
s.advanceUses(v)
// spill unconditionally, will be deleted if never used
spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v)
s.setOrig(spill, v)
s.values[v.ID].spill = spill
s.values[v.ID].spillUsed = false
if loop != nil {
loop.spills = append(loop.spills, v)
nSpillsInner++
}
nSpills++
continue
goto issueSpill
}
if v.Op == OpArg {
// Args are "pre-spilled" values. We don't allocate
......@@ -1009,7 +1014,6 @@ func (s *regAllocState) regalloc(f *Func) {
b.Values = append(b.Values, v)
continue
}
regspec := opcodeTable[v.Op].reg
if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
// No register allocation required (or none specified yet)
s.freeRegs(regspec.clobbers)
......@@ -1167,49 +1171,73 @@ func (s *regAllocState) regalloc(f *Func) {
// Dump any registers which will be clobbered
s.freeRegs(regspec.clobbers)
// Pick register for output.
if s.values[v.ID].needReg {
mask := regspec.outputs[0] & s.allocatable
if opcodeTable[v.Op].resultInArg0 {
if !opcodeTable[v.Op].commutative {
// Output must use the same register as input 0.
r := register(s.f.getHome(args[0].ID).(*Register).Num)
mask = regMask(1) << r
} else {
// Output must use the same register as input 0 or 1.
r0 := register(s.f.getHome(args[0].ID).(*Register).Num)
r1 := register(s.f.getHome(args[1].ID).(*Register).Num)
// Check r0 and r1 for desired output register.
found := false
for _, r := range dinfo[idx].out {
if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
mask = regMask(1) << r
found = true
if r == r1 {
args[0], args[1] = args[1], args[0]
// Pick registers for outputs.
{
outRegs := [2]register{noRegister, noRegister}
var used regMask
for _, out := range regspec.outputs {
mask := out.regs & s.allocatable &^ used
if mask == 0 {
continue
}
if opcodeTable[v.Op].resultInArg0 && out.idx == len(regspec.outputs)-1 {
if !opcodeTable[v.Op].commutative {
// Output must use the same register as input 0.
r := register(s.f.getHome(args[0].ID).(*Register).Num)
mask = regMask(1) << r
} else {
// Output must use the same register as input 0 or 1.
r0 := register(s.f.getHome(args[0].ID).(*Register).Num)
r1 := register(s.f.getHome(args[1].ID).(*Register).Num)
// Check r0 and r1 for desired output register.
found := false
for _, r := range dinfo[idx].out {
if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
mask = regMask(1) << r
found = true
if r == r1 {
args[0], args[1] = args[1], args[0]
}
break
}
break
}
if !found {
// Neither are desired, pick r0.
mask = regMask(1) << r0
}
}
if !found {
// Neither are desired, pick r0.
mask = regMask(1) << r0
}
for _, r := range dinfo[idx].out {
if r != noRegister && (mask&^s.used)>>r&1 != 0 {
// Desired register is allowed and unused.
mask = regMask(1) << r
break
}
}
}
for _, r := range dinfo[idx].out {
if r != noRegister && (mask&^s.used)>>r&1 != 0 {
// Desired register is allowed and unused.
mask = regMask(1) << r
break
// Avoid registers we're saving for other values.
if mask&^desired.avoid != 0 {
mask &^= desired.avoid
}
r := s.allocReg(mask)
outRegs[out.idx] = r
used |= regMask(1) << r
}
// Record register choices
if v.Type.IsTuple() {
var outLocs LocPair
if r := outRegs[0]; r != noRegister {
outLocs[0] = &s.registers[r]
}
if r := outRegs[1]; r != noRegister {
outLocs[1] = &s.registers[r]
}
s.f.setHome(v, outLocs)
// Note that subsequent SelectX instructions will do the assignReg calls.
} else {
if r := outRegs[0]; r != noRegister {
s.assignReg(r, v, v)
}
}
// Avoid registers we're saving for other values.
if mask&^desired.avoid != 0 {
mask &^= desired.avoid
}
r := s.allocReg(v, mask)
s.assignReg(r, v, v)
}
// Issue the Value itself.
......@@ -1228,6 +1256,7 @@ func (s *regAllocState) regalloc(f *Func) {
// f()
// }
// It would be good to have both spill and restore inside the IF.
issueSpill:
if s.values[v.ID].needReg {
spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v)
s.setOrig(spill, v)
......@@ -1246,9 +1275,10 @@ func (s *regAllocState) regalloc(f *Func) {
if s.f.pass.debug > regDebug {
fmt.Printf(" processing control %s\n", v.LongString())
}
// TODO: regspec for block control values, instead of using
// register set from the control op's output.
s.allocValToReg(v, opcodeTable[v.Op].reg.outputs[0], false, b.Line)
// We assume that a control input can be passed in any
// type-compatible register. If this turns out not to be true,
// we'll need to introduce a regspec for a block's control value.
s.allocValToReg(v, s.compatRegs(v.Type), false, b.Line)
// Remove this use from the uses list.
vi := &s.values[v.ID]
u := vi.uses
......@@ -2065,6 +2095,8 @@ func (e *edgeState) findRegFor(typ Type) Location {
return nil
}
// rematerializeable reports whether the register allocator should recompute
// a value instead of spilling/restoring it.
func (v *Value) rematerializeable() bool {
if !opcodeTable[v.Op].rematerializeable {
return false
......
......@@ -22,6 +22,10 @@ func rewriteValue386(v *Value, config *Config) bool {
return rewriteValue386_OpAdd32(v, config)
case OpAdd32F:
return rewriteValue386_OpAdd32F(v, config)
case OpAdd32carry:
return rewriteValue386_OpAdd32carry(v, config)
case OpAdd32withcarry:
return rewriteValue386_OpAdd32withcarry(v, config)
case OpAdd64F:
return rewriteValue386_OpAdd64F(v, config)
case OpAdd8:
......@@ -1116,6 +1120,38 @@ func rewriteValue386_OpAdd32F(v *Value, config *Config) bool {
return true
}
}
func rewriteValue386_OpAdd32carry(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Add32carry x y)
// cond:
// result: (ADDLcarry x y)
for {
x := v.Args[0]
y := v.Args[1]
v.reset(Op386ADDLcarry)
v.AddArg(x)
v.AddArg(y)
return true
}
}
func rewriteValue386_OpAdd32withcarry(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Add32withcarry x y c)
// cond:
// result: (ADCL x y c)
for {
x := v.Args[0]
y := v.Args[1]
c := v.Args[2]
v.reset(Op386ADCL)
v.AddArg(x)
v.AddArg(y)
v.AddArg(c)
return true
}
}
func rewriteValue386_OpAdd64F(v *Value, config *Config) bool {
b := v.Block
_ = b
......
......@@ -654,10 +654,6 @@ func rewriteValueARM(v *Value, config *Config) bool {
return rewriteValueARM_OpARMSUBshiftRL(v, config)
case OpARMSUBshiftRLreg:
return rewriteValueARM_OpARMSUBshiftRLreg(v, config)
case OpSelect0:
return rewriteValueARM_OpSelect0(v, config)
case OpSelect1:
return rewriteValueARM_OpSelect1(v, config)
case OpSignExt16to32:
return rewriteValueARM_OpSignExt16to32(v, config)
case OpSignExt8to16:
......@@ -15554,50 +15550,6 @@ func rewriteValueARM_OpARMSUBshiftRLreg(v *Value, config *Config) bool {
}
return false
}
func rewriteValueARM_OpSelect0(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Select0 <t> x)
// cond: t.IsFlags()
// result: (Carry x)
for {
t := v.Type
x := v.Args[0]
if !(t.IsFlags()) {
break
}
v.reset(OpARMCarry)
v.AddArg(x)
return true
}
// match: (Select0 <t> x)
// cond: !t.IsFlags()
// result: (LoweredSelect0 x)
for {
t := v.Type
x := v.Args[0]
if !(!t.IsFlags()) {
break
}
v.reset(OpARMLoweredSelect0)
v.AddArg(x)
return true
}
return false
}
func rewriteValueARM_OpSelect1(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Select1 x)
// cond:
// result: (LoweredSelect1 x)
for {
x := v.Args[0]
v.reset(OpARMLoweredSelect1)
v.AddArg(x)
return true
}
}
func rewriteValueARM_OpSignExt16to32(v *Value, config *Config) bool {
b := v.Block
_ = b
......
......@@ -45,21 +45,6 @@ func (h ValHeap) Less(i, j int) bool {
if c := sx - sy; c != 0 {
return c > 0 // higher score comes later.
}
if sx == ScoreReadTuple {
// both are tuple-reading ops
// if they read same tuple, flag-reading op comes earlier
if x.Args[0] == y.Args[0] {
if x.Op == OpARMCarry || x.Op == OpARMLoweredSelect0 { //TODO: abstract this condition?
return false
} else {
return true
}
}
// if they read different tuples, order them as
// tuple-generating order to avoid interleaving
x = x.Args[0]
y = y.Args[0]
}
if x.Line != y.Line { // Favor in-order line stepping
return x.Line > y.Line
}
......@@ -119,7 +104,7 @@ func schedule(f *Func) {
// reduce register pressure. It also helps make sure
// VARDEF ops are scheduled before the corresponding LEA.
score[v.ID] = ScoreMemory
case v.Op == OpARMCarry || v.Op == OpARMLoweredSelect0 || v.Op == OpARMLoweredSelect1:
case v.Op == OpSelect0 || v.Op == OpSelect1:
// Schedule the pseudo-op of reading part of a tuple
// immediately after the tuple-generating op, since
// this value is already live. This also removes its
......@@ -226,12 +211,12 @@ func schedule(f *Func) {
// Do not emit tuple-reading ops until we're ready to emit the tuple-generating op.
//TODO: maybe remove ReadTuple score above, if it does not help on performance
switch {
case v.Op == OpARMCarry || v.Op == OpARMLoweredSelect0:
case v.Op == OpSelect0:
if tuples[v.Args[0].ID] == nil {
tuples[v.Args[0].ID] = make([]*Value, 2)
}
tuples[v.Args[0].ID][0] = v
case v.Op == OpARMLoweredSelect1:
case v.Op == OpSelect1:
if tuples[v.Args[0].ID] == nil {
tuples[v.Args[0].ID] = make([]*Value, 2)
}
......
......@@ -64,7 +64,7 @@ func tighten(f *Func) {
continue
default:
}
if v.Op.isTupleSelector() {
if v.Op == OpSelect0 || v.Op == OpSelect1 {
// tuple selector must stay with tuple generator
continue
}
......
......@@ -165,6 +165,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = r
}
// 2-address opcode arithmetic
case ssa.Op386SUBL,
ssa.Op386MULL,
......@@ -176,13 +177,22 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
ssa.Op386PXOR:
ssa.Op386PXOR,
ssa.Op386ADCL:
r := gc.SSARegNum(v)
if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))
case ssa.Op386ADDLcarry:
// output 0 is carry, output 1 is the low 32 bits.
r := gc.SSARegNum1(v)
if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output[1] not in same register %s", v.LongString())
}
opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))
case ssa.Op386DIVL, ssa.Op386DIVW,
ssa.Op386DIVLU, ssa.Op386DIVWU,
ssa.Op386MODL, ssa.Op386MODW,
......@@ -716,7 +726,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Reg = gc.SSARegNum(v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum(v)
case ssa.OpSP, ssa.OpSB:
case ssa.OpSP, ssa.OpSB, ssa.OpSelect0, ssa.OpSelect1:
// nothing to do
case ssa.Op386SETEQ, ssa.Op386SETNE,
ssa.Op386SETL, ssa.Op386SETLE,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment