Commit 247786c1 authored by Keith Randall's avatar Keith Randall

[dev.ssa] B[dev.ssa] cmd/internal/ssa: Cleanup & reorg

Rename ops like ADDCQ to ADDQconst, so it is clear what the base opcode is and what
the modifiers are.

Convert FP references to SP references once we know the frame size.  Related, compute
the frame size in the ssa package.

Do a bunch of small fixes.

Add a TODO list for people to peruse.

Change-Id: Ia6a3fe2bf57e5a2e5e883032e2a2a3fdd566c038
Reviewed-on: https://go-review.googlesource.com/10465Reviewed-by: default avatarAlan Donovan <adonovan@google.com>
parent cfc2aa56
...@@ -287,6 +287,14 @@ func (s *state) expr(n *Node) *ssa.Value { ...@@ -287,6 +287,14 @@ func (s *state) expr(n *Node) *ssa.Value {
a := s.expr(n.Left) a := s.expr(n.Left)
b := s.expr(n.Right) b := s.expr(n.Right)
return s.curBlock.NewValue2(ssa.OpSub, a.Type, nil, a, b) return s.curBlock.NewValue2(ssa.OpSub, a.Type, nil, a, b)
case OLSH:
a := s.expr(n.Left)
b := s.expr(n.Right)
return s.curBlock.NewValue2(ssa.OpLsh, a.Type, nil, a, b)
case ORSH:
a := s.expr(n.Left)
b := s.expr(n.Right)
return s.curBlock.NewValue2(ssa.OpRsh, a.Type, nil, a, b)
case OADDR: case OADDR:
return s.addr(n.Left) return s.addr(n.Left)
...@@ -519,25 +527,15 @@ type branch struct { ...@@ -519,25 +527,15 @@ type branch struct {
// gcargs and gclocals are filled in with pointer maps for the frame. // gcargs and gclocals are filled in with pointer maps for the frame.
func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) { func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
// TODO: line numbers // TODO: line numbers
// TODO: layout frame
stkSize := int64(64)
if Hasdefer != 0 { if f.FrameSize > 1<<31 {
// deferreturn pretends to have one uintptr argument.
// Reserve space for it so stack scanner is happy.
if Maxarg < int64(Widthptr) {
Maxarg = int64(Widthptr)
}
}
if stkSize+Maxarg > 1<<31 {
Yyerror("stack frame too large (>2GB)") Yyerror("stack frame too large (>2GB)")
return return
} }
frameSize := stkSize + Maxarg
ptxt.To.Type = obj.TYPE_TEXTSIZE ptxt.To.Type = obj.TYPE_TEXTSIZE
ptxt.To.Val = int32(Rnd(Curfn.Type.Argwid, int64(Widthptr))) // arg size ptxt.To.Val = int32(Rnd(Curfn.Type.Argwid, int64(Widthptr))) // arg size
ptxt.To.Offset = frameSize - 8 // TODO: arch-dependent ptxt.To.Offset = f.FrameSize - 8 // TODO: arch-dependent
// Remember where each block starts. // Remember where each block starts.
bstart := make([]*obj.Prog, f.NumBlocks()) bstart := make([]*obj.Prog, f.NumBlocks())
...@@ -551,7 +549,7 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) { ...@@ -551,7 +549,7 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
bstart[b.ID] = Pc bstart[b.ID] = Pc
// Emit values in block // Emit values in block
for _, v := range b.Values { for _, v := range b.Values {
genValue(v, frameSize) genValue(v)
} }
// Emit control flow instructions for block // Emit control flow instructions for block
var next *ssa.Block var next *ssa.Block
...@@ -578,7 +576,7 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) { ...@@ -578,7 +576,7 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
liveness(Curfn, ptxt, gcargs, gclocals) liveness(Curfn, ptxt, gcargs, gclocals)
} }
func genValue(v *ssa.Value, frameSize int64) { func genValue(v *ssa.Value) {
switch v.Op { switch v.Op {
case ssa.OpADDQ: case ssa.OpADDQ:
// TODO: use addq instead of leaq if target is in the right register. // TODO: use addq instead of leaq if target is in the right register.
...@@ -589,7 +587,7 @@ func genValue(v *ssa.Value, frameSize int64) { ...@@ -589,7 +587,7 @@ func genValue(v *ssa.Value, frameSize int64) {
p.From.Index = regnum(v.Args[1]) p.From.Index = regnum(v.Args[1])
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v) p.To.Reg = regnum(v)
case ssa.OpADDCQ: case ssa.OpADDQconst:
// TODO: use addq instead of leaq if target is in the right register. // TODO: use addq instead of leaq if target is in the right register.
p := Prog(x86.ALEAQ) p := Prog(x86.ALEAQ)
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
...@@ -597,7 +595,17 @@ func genValue(v *ssa.Value, frameSize int64) { ...@@ -597,7 +595,17 @@ func genValue(v *ssa.Value, frameSize int64) {
p.From.Offset = v.Aux.(int64) p.From.Offset = v.Aux.(int64)
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v) p.To.Reg = regnum(v)
case ssa.OpSUBCQ: case ssa.OpMULQconst:
// TODO: this isn't right. doasm fails on it. I don't think obj
// has ever been taught to compile imul $c, r1, r2.
p := Prog(x86.AIMULQ)
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.Aux.(int64)
p.From3.Type = obj.TYPE_REG
p.From3.Reg = regnum(v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v)
case ssa.OpSUBQconst:
// This code compensates for the fact that the register allocator // This code compensates for the fact that the register allocator
// doesn't understand 2-address instructions yet. TODO: fix that. // doesn't understand 2-address instructions yet. TODO: fix that.
x := regnum(v.Args[0]) x := regnum(v.Args[0])
...@@ -615,13 +623,38 @@ func genValue(v *ssa.Value, frameSize int64) { ...@@ -615,13 +623,38 @@ func genValue(v *ssa.Value, frameSize int64) {
p.From.Offset = v.Aux.(int64) p.From.Offset = v.Aux.(int64)
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = r p.To.Reg = r
case ssa.OpSHLQconst:
x := regnum(v.Args[0])
r := regnum(v)
if x != r {
p := Prog(x86.AMOVQ)
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = r
x = r
}
p := Prog(x86.ASHLQ)
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.Aux.(int64)
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpLEAQ:
p := Prog(x86.ALEAQ)
p.From.Type = obj.TYPE_MEM
p.From.Reg = regnum(v.Args[0])
p.From.Scale = 1
p.From.Index = regnum(v.Args[1])
p.From.Offset = v.Aux.(int64)
p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v)
case ssa.OpCMPQ: case ssa.OpCMPQ:
p := Prog(x86.ACMPQ) p := Prog(x86.ACMPQ)
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[0]) p.From.Reg = regnum(v.Args[0])
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v.Args[1]) p.To.Reg = regnum(v.Args[1])
case ssa.OpCMPCQ: case ssa.OpCMPQconst:
p := Prog(x86.ACMPQ) p := Prog(x86.ACMPQ)
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[0]) p.From.Reg = regnum(v.Args[0])
...@@ -643,38 +676,22 @@ func genValue(v *ssa.Value, frameSize int64) { ...@@ -643,38 +676,22 @@ func genValue(v *ssa.Value, frameSize int64) {
case ssa.OpMOVQload: case ssa.OpMOVQload:
p := Prog(x86.AMOVQ) p := Prog(x86.AMOVQ)
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" {
// TODO: do the fp/sp adjustment somewhere else?
p.From.Reg = x86.REG_SP
p.From.Offset = v.Aux.(int64) + frameSize
} else {
p.From.Reg = regnum(v.Args[0]) p.From.Reg = regnum(v.Args[0])
p.From.Offset = v.Aux.(int64) p.From.Offset = v.Aux.(int64)
}
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v) p.To.Reg = regnum(v)
case ssa.OpMOVBload: case ssa.OpMOVBload:
p := Prog(x86.AMOVB) p := Prog(x86.AMOVB)
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" {
p.From.Reg = x86.REG_SP
p.From.Offset = v.Aux.(int64) + frameSize
} else {
p.From.Reg = regnum(v.Args[0]) p.From.Reg = regnum(v.Args[0])
p.From.Offset = v.Aux.(int64) p.From.Offset = v.Aux.(int64)
}
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v) p.To.Reg = regnum(v)
case ssa.OpMOVQloadidx8: case ssa.OpMOVQloadidx8:
p := Prog(x86.AMOVQ) p := Prog(x86.AMOVQ)
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" {
p.From.Reg = x86.REG_SP
p.From.Offset = v.Aux.(int64) + frameSize
} else {
p.From.Reg = regnum(v.Args[0]) p.From.Reg = regnum(v.Args[0])
p.From.Offset = v.Aux.(int64) p.From.Offset = v.Aux.(int64)
}
p.From.Scale = 8 p.From.Scale = 8
p.From.Index = regnum(v.Args[1]) p.From.Index = regnum(v.Args[1])
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
...@@ -684,13 +701,8 @@ func genValue(v *ssa.Value, frameSize int64) { ...@@ -684,13 +701,8 @@ func genValue(v *ssa.Value, frameSize int64) {
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[1]) p.From.Reg = regnum(v.Args[1])
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" {
p.To.Reg = x86.REG_SP
p.To.Offset = v.Aux.(int64) + frameSize
} else {
p.To.Reg = regnum(v.Args[0]) p.To.Reg = regnum(v.Args[0])
p.To.Offset = v.Aux.(int64) p.To.Offset = v.Aux.(int64)
}
case ssa.OpCopy: case ssa.OpCopy:
x := regnum(v.Args[0]) x := regnum(v.Args[0])
y := regnum(v) y := regnum(v)
...@@ -705,7 +717,7 @@ func genValue(v *ssa.Value, frameSize int64) { ...@@ -705,7 +717,7 @@ func genValue(v *ssa.Value, frameSize int64) {
p := Prog(x86.AMOVQ) p := Prog(x86.AMOVQ)
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
p.From.Reg = x86.REG_SP p.From.Reg = x86.REG_SP
p.From.Offset = frameSize - localOffset(v.Args[0]) p.From.Offset = localOffset(v.Args[0])
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v) p.To.Reg = regnum(v)
case ssa.OpStoreReg8: case ssa.OpStoreReg8:
...@@ -714,7 +726,7 @@ func genValue(v *ssa.Value, frameSize int64) { ...@@ -714,7 +726,7 @@ func genValue(v *ssa.Value, frameSize int64) {
p.From.Reg = regnum(v.Args[0]) p.From.Reg = regnum(v.Args[0])
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Reg = x86.REG_SP p.To.Reg = x86.REG_SP
p.To.Offset = frameSize - localOffset(v) p.To.Offset = localOffset(v)
case ssa.OpPhi: case ssa.OpPhi:
// just check to make sure regalloc did it right // just check to make sure regalloc did it right
f := v.Block.Func f := v.Block.Func
...@@ -740,10 +752,15 @@ func genValue(v *ssa.Value, frameSize int64) { ...@@ -740,10 +752,15 @@ func genValue(v *ssa.Value, frameSize int64) {
p.From.Offset = g.Offset p.From.Offset = g.Offset
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v) p.To.Reg = regnum(v)
case ssa.OpStaticCall:
p := Prog(obj.ACALL)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = Linksym(v.Aux.(*Sym))
case ssa.OpFP, ssa.OpSP: case ssa.OpFP, ssa.OpSP:
// nothing to do // nothing to do
default: default:
log.Fatalf("value %s not implemented yet", v.LongString()) log.Fatalf("value %s not implemented", v.LongString())
} }
} }
...@@ -757,6 +774,12 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch { ...@@ -757,6 +774,12 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch {
} }
case ssa.BlockExit: case ssa.BlockExit:
Prog(obj.ARET) Prog(obj.ARET)
case ssa.BlockCall:
if b.Succs[0] != next {
p := Prog(obj.AJMP)
p.To.Type = obj.TYPE_BRANCH
branches = append(branches, branch{p, b.Succs[0]})
}
case ssa.BlockEQ: case ssa.BlockEQ:
if b.Succs[0] == next { if b.Succs[0] == next {
p := Prog(x86.AJNE) p := Prog(x86.AJNE)
...@@ -844,7 +867,7 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch { ...@@ -844,7 +867,7 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch {
} }
default: default:
log.Fatalf("branch %s not implemented yet", b.LongString()) log.Fatalf("branch %s not implemented", b.LongString())
} }
return branches return branches
} }
......
This is a list of things that need to be worked on. It is by no means complete.
Allocation
- Allocation of decls in stackalloc. Decls survive if they are
addrtaken or are too large for registerization.
Scheduling
- Make sure loads are scheduled correctly with respect to stores.
Same for flag type values. We can't have more than one value of
mem or flag types live at once.
- Reduce register pressure. Schedule instructions which kill
variables first.
Values
- Add a line number field. Figure out how to populate it and
maintain it during rewrites.
- Store *Type instead of Type? Keep an array of used Types in Func
and reference by id? Unify with the type ../gc so we just use a
pointer instead of an interface?
- Recycle dead values instead of using GC to do that.
- A lot of Aux fields are just int64. Add a separate AuxInt field?
If not that, then cache the interfaces that wrap int64s.
- OpStore uses 3 args. Increase the size of argstorage to 3?
Opcodes
- Rename ops to prevent cross-arch conflicts. MOVQ -> MOVQamd64 (or
MOVQ6?). Other option: build opcode table in Config instead of globally.
- Remove asm string from opinfo, no longer needed.
- It's annoying to list the opcode both in the opcode list and an
opInfo map entry. Specify it one place and use go:generate to
produce both?
Regalloc
- Make less arch-dependent
- Don't spill everything at every basic block boundary.
- Allow args and return values to be ssa-able.
- Handle 2-address instructions.
Rewrites
- Strength reduction (both arch-indep and arch-dependent?)
- Code sequence for shifts >= wordsize
- Start another architecture (arm?)
Common-Subexpression Elimination
- Make better decision about which value in an equivalence class we should
choose to replace other values in that class.
- Can we move control values out of their basic block?
...@@ -17,6 +17,8 @@ type Func struct { ...@@ -17,6 +17,8 @@ type Func struct {
// when register allocation is done, maps value ids to locations // when register allocation is done, maps value ids to locations
RegAlloc []Location RegAlloc []Location
// when stackalloc is done, the size of the stack frame
FrameSize int64
} }
// NumBlocks returns an integer larger than the id of any Block in the Func. // NumBlocks returns an integer larger than the id of any Block in the Func.
......
...@@ -26,19 +26,9 @@ func (r *Register) Name() string { ...@@ -26,19 +26,9 @@ func (r *Register) Name() string {
// A LocalSlot is a location in the stack frame. // A LocalSlot is a location in the stack frame.
type LocalSlot struct { type LocalSlot struct {
Idx int64 // offset in locals area (distance down from FP == caller's SP) Idx int64 // offset in locals area (distance up from SP)
} }
func (s *LocalSlot) Name() string { func (s *LocalSlot) Name() string {
return fmt.Sprintf("-%d(FP)", s.Idx) return fmt.Sprintf("%d(SP)", s.Idx)
}
// An ArgSlot is a location in the parents' stack frame where it passed us an argument.
type ArgSlot struct {
idx int64 // offset in argument area
}
// A CalleeSlot is a location in the stack frame where we pass an argument to a callee.
type CalleeSlot struct {
idx int64 // offset in callee area
} }
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
package ssa package ssa
import "log"
//go:generate go run rulegen/rulegen.go rulegen/lower_amd64.rules lowerAmd64 lowerAmd64.go //go:generate go run rulegen/rulegen.go rulegen/lower_amd64.rules lowerAmd64 lowerAmd64.go
// convert to machine-dependent ops // convert to machine-dependent ops
...@@ -11,7 +13,14 @@ func lower(f *Func) { ...@@ -11,7 +13,14 @@ func lower(f *Func) {
// repeat rewrites until we find no more rewrites // repeat rewrites until we find no more rewrites
applyRewrite(f, f.Config.lower) applyRewrite(f, f.Config.lower)
// TODO: check for unlowered opcodes, fail if we find one // Check for unlowered opcodes, fail if we find one.
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op < OpGenericEnd && v.Op != OpFP && v.Op != OpSP && v.Op != OpArg && v.Op != OpCopy && v.Op != OpPhi {
log.Panicf("%s not lowered", v.LongString())
}
}
}
// additional pass for 386/amd64, link condition codes directly to blocks // additional pass for 386/amd64, link condition codes directly to blocks
// TODO: do generically somehow? Special "block" rewrite rules? // TODO: do generically somehow? Special "block" rewrite rules?
......
This diff is collapsed.
...@@ -34,6 +34,8 @@ const ( ...@@ -34,6 +34,8 @@ const (
OpAdd // arg0 + arg1 OpAdd // arg0 + arg1
OpSub // arg0 - arg1 OpSub // arg0 - arg1
OpMul // arg0 * arg1 OpMul // arg0 * arg1
OpLsh // arg0 << arg1
OpRsh // arg0 >> arg1 (signed/unsigned depending on signedness of type)
// 2-input comparisons // 2-input comparisons
OpLess // arg0 < arg1 OpLess // arg0 < arg1
...@@ -83,10 +85,6 @@ const ( ...@@ -83,10 +85,6 @@ const (
OpOffPtr // arg0 + aux.(int64) (arg0 and result are pointers) OpOffPtr // arg0 + aux.(int64) (arg0 and result are pointers)
// These ops return a pointer to a location on the stack.
OpFPAddr // FP + aux.(int64) (+ == args from caller, - == locals)
OpSPAddr // SP + aux.(int64)
// spill&restore ops for the register allocator. These are // spill&restore ops for the register allocator. These are
// semantically identical to OpCopy; they do not take/return // semantically identical to OpCopy; they do not take/return
// stores like regular memory ops do. We can get away without memory // stores like regular memory ops do. We can get away without memory
...@@ -96,6 +94,8 @@ const ( ...@@ -96,6 +94,8 @@ const (
// used during ssa construction. Like OpCopy, but the arg has not been specified yet. // used during ssa construction. Like OpCopy, but the arg has not been specified yet.
OpFwdRef OpFwdRef
OpGenericEnd
) )
// GlobalOffset represents a fixed offset within a global variable // GlobalOffset represents a fixed offset within a global variable
......
...@@ -6,16 +6,16 @@ import "fmt" ...@@ -6,16 +6,16 @@ import "fmt"
const ( const (
_Op_name_0 = "opInvalid" _Op_name_0 = "opInvalid"
_Op_name_1 = "opGenericBaseOpAddOpSubOpMulOpLessOpConstOpArgOpGlobalOpFuncOpFPOpSPOpCopyOpMoveOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpLoadOpStoreOpArrayIndexOpPtrIndexOpIsNonNilOpIsInBoundsOpCallOpStaticCallOpConvertOpConvNopOpOffPtrOpFPAddrOpSPAddrOpStoreReg8OpLoadReg8OpFwdRef" _Op_name_1 = "opGenericBaseOpAddOpSubOpMulOpLshOpRshOpLessOpConstOpArgOpGlobalOpFuncOpFPOpSPOpCopyOpMoveOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpLoadOpStoreOpArrayIndexOpPtrIndexOpIsNonNilOpIsInBoundsOpCallOpStaticCallOpConvertOpConvNopOpOffPtrOpStoreReg8OpLoadReg8OpFwdRefOpGenericEnd"
_Op_name_2 = "opAMD64BaseOpADDQOpSUBQOpADDCQOpSUBCQOpMULQOpMULCQOpSHLQOpSHLCQOpNEGQOpADDLOpCMPQOpCMPCQOpTESTQOpTESTBOpSETEQOpSETNEOpSETLOpSETGEOpSETBOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpLEAQglobalOpMOVBloadOpMOVBQZXloadOpMOVBQSXloadOpMOVQloadOpMOVQstoreOpMOVQloadidx8OpMOVQstoreidx8OpMOVQloadglobalOpMOVQstoreglobalOpMOVQconstOpREPMOVSB" _Op_name_2 = "opAMD64BaseOpADDQOpADDQconstOpSUBQOpSUBQconstOpMULQOpMULQconstOpSHLQOpSHLQconstOpNEGQOpADDLOpCMPQOpCMPQconstOpTESTQOpTESTBOpSETEQOpSETNEOpSETLOpSETGEOpSETBOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpLEAQglobalOpMOVBloadOpMOVBQZXloadOpMOVBQSXloadOpMOVQloadOpMOVQstoreOpMOVQloadidx8OpMOVQstoreidx8OpMOVQloadglobalOpMOVQstoreglobalOpMOVQconstOpREPMOVSB"
_Op_name_3 = "op386Base" _Op_name_3 = "op386Base"
_Op_name_4 = "opMax" _Op_name_4 = "opMax"
) )
var ( var (
_Op_index_0 = [...]uint8{0, 9} _Op_index_0 = [...]uint8{0, 9}
_Op_index_1 = [...]uint16{0, 13, 18, 23, 28, 34, 41, 46, 54, 60, 64, 68, 74, 80, 85, 96, 106, 116, 126, 138, 149, 160, 166, 173, 185, 195, 205, 217, 223, 235, 244, 253, 261, 269, 277, 288, 298, 306} _Op_index_1 = [...]uint16{0, 13, 18, 23, 28, 33, 38, 44, 51, 56, 64, 70, 74, 78, 84, 90, 95, 106, 116, 126, 136, 148, 159, 170, 176, 183, 195, 205, 215, 227, 233, 245, 254, 263, 271, 282, 292, 300, 312}
_Op_index_2 = [...]uint16{0, 11, 17, 23, 30, 37, 43, 50, 56, 63, 69, 75, 81, 88, 95, 102, 109, 116, 122, 129, 135, 148, 154, 161, 168, 175, 187, 197, 210, 223, 233, 244, 258, 273, 289, 306, 317, 327} _Op_index_2 = [...]uint16{0, 11, 17, 28, 34, 45, 51, 62, 68, 79, 85, 91, 97, 108, 115, 122, 129, 136, 142, 149, 155, 168, 174, 181, 188, 195, 207, 217, 230, 243, 253, 264, 278, 293, 309, 326, 337, 347}
_Op_index_3 = [...]uint8{0, 9} _Op_index_3 = [...]uint8{0, 9}
_Op_index_4 = [...]uint8{0, 5} _Op_index_4 = [...]uint8{0, 5}
) )
...@@ -24,7 +24,7 @@ func (i Op) String() string { ...@@ -24,7 +24,7 @@ func (i Op) String() string {
switch { switch {
case i == 0: case i == 0:
return _Op_name_0 return _Op_name_0
case 1001 <= i && i <= 1037: case 1001 <= i && i <= 1038:
i -= 1001 i -= 1001
return _Op_name_1[_Op_index_1[i]:_Op_index_1[i+1]] return _Op_name_1[_Op_index_1[i]:_Op_index_1[i+1]]
case 2001 <= i && i <= 2037: case 2001 <= i && i <= 2037:
......
...@@ -14,13 +14,13 @@ const ( ...@@ -14,13 +14,13 @@ const (
// arithmetic // arithmetic
OpADDQ // arg0 + arg1 OpADDQ // arg0 + arg1
OpADDQconst // arg + aux.(int64)
OpSUBQ // arg0 - arg1 OpSUBQ // arg0 - arg1
OpADDCQ // arg + aux.(int64) OpSUBQconst // arg - aux.(int64)
OpSUBCQ // arg - aux.(int64)
OpMULQ // arg0 * arg1 OpMULQ // arg0 * arg1
OpMULCQ // arg * aux.(int64) OpMULQconst // arg * aux.(int64)
OpSHLQ // arg0 << arg1 OpSHLQ // arg0 << arg1
OpSHLCQ // arg << aux.(int64) OpSHLQconst // arg << aux.(int64)
OpNEGQ // -arg OpNEGQ // -arg
OpADDL // arg0 + arg1 OpADDL // arg0 + arg1
...@@ -28,7 +28,7 @@ const ( ...@@ -28,7 +28,7 @@ const (
// We pretend the flags type is an opaque thing that comparisons generate // We pretend the flags type is an opaque thing that comparisons generate
// and from which we can extract boolean conditions like <, ==, etc. // and from which we can extract boolean conditions like <, ==, etc.
OpCMPQ // arg0 compare to arg1 OpCMPQ // arg0 compare to arg1
OpCMPCQ // arg0 compare to aux.(int64) OpCMPQconst // arg0 compare to aux.(int64)
OpTESTQ // (arg0 & arg1) compare to 0 OpTESTQ // (arg0 & arg1) compare to 0
OpTESTB // (arg0 & arg1) compare to 0 OpTESTB // (arg0 & arg1) compare to 0
...@@ -96,7 +96,8 @@ var regsAMD64 = [...]string{ ...@@ -96,7 +96,8 @@ var regsAMD64 = [...]string{
"OVERWRITE0", // the same register as the first input "OVERWRITE0", // the same register as the first input
} }
var gp regMask = 0x1ffff // all integer registers (including SP&FP) var gp regMask = 0x1ffff // all integer registers including SP&FP
var gpout regMask = 0xffef // integer registers not including SP&FP
var cx regMask = 1 << 1 var cx regMask = 1 << 1
var si regMask = 1 << 6 var si regMask = 1 << 6
var di regMask = 1 << 7 var di regMask = 1 << 7
...@@ -104,35 +105,35 @@ var flags regMask = 1 << 17 ...@@ -104,35 +105,35 @@ var flags regMask = 1 << 17
var ( var (
// gp = general purpose (integer) registers // gp = general purpose (integer) registers
gp21 = [2][]regMask{{gp, gp}, {gp}} // 2 input, 1 output gp21 = [2][]regMask{{gp, gp}, {gpout}} // 2 input, 1 output
gp11 = [2][]regMask{{gp}, {gp}} // 1 input, 1 output gp11 = [2][]regMask{{gp}, {gpout}} // 1 input, 1 output
gp01 = [2][]regMask{{}, {gp}} // 0 input, 1 output gp01 = [2][]regMask{{}, {gpout}} // 0 input, 1 output
shift = [2][]regMask{{gp, cx}, {gp}} // shift operations shift = [2][]regMask{{gp, cx}, {gpout}} // shift operations
gp2_flags = [2][]regMask{{gp, gp}, {flags}} // generate flags from 2 gp regs gp2_flags = [2][]regMask{{gp, gp}, {flags}} // generate flags from 2 gp regs
gp1_flags = [2][]regMask{{gp}, {flags}} // generate flags from 1 gp reg gp1_flags = [2][]regMask{{gp}, {flags}} // generate flags from 1 gp reg
gpload = [2][]regMask{{gp, 0}, {gp}} gpload = [2][]regMask{{gp, 0}, {gpout}}
gploadidx = [2][]regMask{{gp, gp, 0}, {gp}} gploadidx = [2][]regMask{{gp, gp, 0}, {gpout}}
gpstore = [2][]regMask{{gp, gp, 0}, {0}} gpstore = [2][]regMask{{gp, gp, 0}, {0}}
gpstoreidx = [2][]regMask{{gp, gp, gp, 0}, {0}} gpstoreidx = [2][]regMask{{gp, gp, gp, 0}, {0}}
gpload_stack = [2][]regMask{{0}, {gp}} gpload_stack = [2][]regMask{{0}, {gpout}}
gpstore_stack = [2][]regMask{{gp, 0}, {0}} gpstore_stack = [2][]regMask{{gp, 0}, {0}}
) )
// Opcodes that appear in an output amd64 program // Opcodes that appear in an output amd64 program
var amd64Table = map[Op]opInfo{ var amd64Table = map[Op]opInfo{
OpADDQ: {flags: OpFlagCommutative, asm: "ADDQ\t%I0,%I1,%O0", reg: gp21}, // TODO: overwrite OpADDQ: {flags: OpFlagCommutative, asm: "ADDQ\t%I0,%I1,%O0", reg: gp21}, // TODO: overwrite
OpADDCQ: {asm: "ADDQ\t$%A,%I0,%O0", reg: gp11}, // aux = int64 constant to add OpADDQconst: {asm: "ADDQ\t$%A,%I0,%O0", reg: gp11}, // aux = int64 constant to add
OpSUBQ: {asm: "SUBQ\t%I0,%I1,%O0", reg: gp21}, OpSUBQ: {asm: "SUBQ\t%I0,%I1,%O0", reg: gp21},
OpSUBCQ: {asm: "SUBQ\t$%A,%I0,%O0", reg: gp11}, OpSUBQconst: {asm: "SUBQ\t$%A,%I0,%O0", reg: gp11},
OpMULQ: {asm: "MULQ\t%I0,%I1,%O0", reg: gp21}, OpMULQ: {asm: "MULQ\t%I0,%I1,%O0", reg: gp21},
OpMULCQ: {asm: "MULQ\t$%A,%I0,%O0", reg: gp11}, OpMULQconst: {asm: "IMULQ\t$%A,%I0,%O0", reg: gp11},
OpSHLQ: {asm: "SHLQ\t%I0,%I1,%O0", reg: gp21}, OpSHLQ: {asm: "SHLQ\t%I0,%I1,%O0", reg: gp21},
OpSHLCQ: {asm: "SHLQ\t$%A,%I0,%O0", reg: gp11}, OpSHLQconst: {asm: "SHLQ\t$%A,%I0,%O0", reg: gp11},
OpCMPQ: {asm: "CMPQ\t%I0,%I1", reg: gp2_flags}, // compute arg[0]-arg[1] and produce flags OpCMPQ: {asm: "CMPQ\t%I0,%I1", reg: gp2_flags}, // compute arg[0]-arg[1] and produce flags
OpCMPCQ: {asm: "CMPQ\t$%A,%I0", reg: gp1_flags}, OpCMPQconst: {asm: "CMPQ\t$%A,%I0", reg: gp1_flags},
OpTESTQ: {asm: "TESTQ\t%I0,%I1", reg: gp2_flags}, OpTESTQ: {asm: "TESTQ\t%I0,%I1", reg: gp2_flags},
OpTESTB: {asm: "TESTB\t%I0,%I1", reg: gp2_flags}, OpTESTB: {asm: "TESTB\t%I0,%I1", reg: gp2_flags},
......
...@@ -68,8 +68,9 @@ func typeSize(t Type) int64 { ...@@ -68,8 +68,9 @@ func typeSize(t Type) int64 {
// addOff adds two offset aux values. Each should be an int64. Fails if wraparound happens. // addOff adds two offset aux values. Each should be an int64. Fails if wraparound happens.
func addOff(a, b interface{}) interface{} { func addOff(a, b interface{}) interface{} {
x := a.(int64) return addOffset(a.(int64), b.(int64))
y := b.(int64) }
func addOffset(x, y int64) int64 {
z := x + y z := x + y
// x and y have same sign and z has a different sign => overflow // x and y have same sign and z has a different sign => overflow
if x^y >= 0 && x^z < 0 { if x^y >= 0 && x^z < 0 {
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
(Sub <t> x y) && is64BitInt(t) -> (SUBQ x y) (Sub <t> x y) && is64BitInt(t) -> (SUBQ x y)
(Mul <t> x y) && is64BitInt(t) -> (MULQ x y) (Mul <t> x y) && is64BitInt(t) -> (MULQ x y)
(Lsh <t> x y) && is64BitInt(t) -> (SHLQ x y) // TODO: check y>63
(Less x y) && is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type) -> (SETL (CMPQ <TypeFlags> x y)) (Less x y) && is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type) -> (SETL (CMPQ <TypeFlags> x y))
(Load <t> ptr mem) && t.IsBoolean() -> (MOVBload [int64(0)] ptr mem) (Load <t> ptr mem) && t.IsBoolean() -> (MOVBload [int64(0)] ptr mem)
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
(Move [size] dst src mem) -> (REPMOVSB dst src (Const <TypeUInt64> [size.(int64)]) mem) (Move [size] dst src mem) -> (REPMOVSB dst src (Const <TypeUInt64> [size.(int64)]) mem)
(OffPtr [off] ptr) -> (ADDCQ [off] ptr) (OffPtr [off] ptr) -> (ADDQconst [off] ptr)
(Const <t> [val]) && is64BitInt(t) -> (MOVQconst [val]) (Const <t> [val]) && is64BitInt(t) -> (MOVQconst [val])
...@@ -51,39 +51,41 @@ ...@@ -51,39 +51,41 @@
(Global [sym]) -> (LEAQglobal [GlobalOffset{sym,0}]) (Global [sym]) -> (LEAQglobal [GlobalOffset{sym,0}])
// fold constants into instructions // fold constants into instructions
(ADDQ x (MOVQconst [c])) -> (ADDCQ [c] x) // TODO: restrict c to int32 range? (ADDQ x (MOVQconst [c])) -> (ADDQconst [c] x) // TODO: restrict c to int32 range?
(ADDQ (MOVQconst [c]) x) -> (ADDCQ [c] x) (ADDQ (MOVQconst [c]) x) -> (ADDQconst [c] x)
(SUBQ x (MOVQconst [c])) -> (SUBCQ x [c]) (SUBQ x (MOVQconst [c])) -> (SUBQconst x [c])
(SUBQ <t> (MOVQconst [c]) x) -> (NEGQ (SUBCQ <t> x [c])) (SUBQ <t> (MOVQconst [c]) x) -> (NEGQ (SUBQconst <t> x [c]))
(MULQ x (MOVQconst [c])) -> (MULCQ [c] x) (MULQ x (MOVQconst [c])) && c.(int64) == int64(int32(c.(int64))) -> (MULQconst [c] x)
(MULQ (MOVQconst [c]) x) -> (MULCQ [c] x) (MULQ (MOVQconst [c]) x) -> (MULQconst [c] x)
(CMPQ x (MOVQconst [c])) -> (CMPCQ x [c]) (SHLQ x (MOVQconst [c])) -> (SHLQconst [c] x)
(CMPQ (MOVQconst [c]) x) -> (InvertFlags (CMPCQ <TypeFlags> x [c])) (CMPQ x (MOVQconst [c])) -> (CMPQconst x [c])
(CMPQ (MOVQconst [c]) x) -> (InvertFlags (CMPQconst <TypeFlags> x [c]))
// strength reduction // strength reduction
// TODO: do this a lot more generically // TODO: do this a lot more generically
(MULCQ [c] x) && c.(int64) == 8 -> (SHLCQ [int64(3)] x) (MULQconst [c] x) && c.(int64) == 8 -> (SHLQconst [int64(3)] x)
(MULQconst [c] x) && c.(int64) == 64 -> (SHLQconst [int64(5)] x)
// fold add/shift into leaq // fold add/shift into leaq
(ADDQ x (SHLCQ [shift] y)) && shift.(int64) == 3 -> (LEAQ8 [int64(0)] x y) (ADDQ x (SHLQconst [shift] y)) && shift.(int64) == 3 -> (LEAQ8 [int64(0)] x y)
(ADDCQ [c] (LEAQ8 [d] x y)) -> (LEAQ8 [addOff(c, d)] x y) (ADDQconst [c] (LEAQ8 [d] x y)) -> (LEAQ8 [addOff(c, d)] x y)
// reverse ordering of compare instruction // reverse ordering of compare instruction
(SETL (InvertFlags x)) -> (SETGE x) (SETL (InvertFlags x)) -> (SETGE x)
// fold constants into memory operations // fold constants into memory operations
// Note that this is not always a good idea because if not all the uses of // Note that this is not always a good idea because if not all the uses of
// the ADDCQ get eliminated, we still have to compute the ADDCQ and we now // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
// have potentially two live values (ptr and (ADDCQ [off] ptr)) instead of one. // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
// Nevertheless, let's do it! // Nevertheless, let's do it!
(MOVQload [off1] (ADDCQ [off2] ptr) mem) -> (MOVQload [addOff(off1, off2)] ptr mem) (MOVQload [off1] (ADDQconst [off2] ptr) mem) -> (MOVQload [addOff(off1, off2)] ptr mem)
(MOVQstore [off1] (ADDCQ [off2] ptr) val mem) -> (MOVQstore [addOff(off1, off2)] ptr val mem) (MOVQstore [off1] (ADDQconst [off2] ptr) val mem) -> (MOVQstore [addOff(off1, off2)] ptr val mem)
// indexed loads and stores // indexed loads and stores
(MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem) (MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
(MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem) (MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
(MOVQloadidx8 [off1] (ADDCQ [off2] ptr) idx mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem) (MOVQloadidx8 [off1] (ADDQconst [off2] ptr) idx mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
(MOVQstoreidx8 [off1] (ADDCQ [off2] ptr) idx val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem) (MOVQstoreidx8 [off1] (ADDQconst [off2] ptr) idx val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
(ADDCQ [off] x) && off.(int64) == 0 -> (Copy x) (ADDQconst [off] x) && off.(int64) == 0 -> (Copy x)
package ssa package ssa
import "log"
// stackalloc allocates storage in the stack frame for // stackalloc allocates storage in the stack frame for
// all Values that did not get a register. // all Values that did not get a register.
func stackalloc(f *Func) { func stackalloc(f *Func) {
home := f.RegAlloc home := f.RegAlloc
var n int64 = 8 // 8 = space for return address. TODO: arch-dependent // First compute the size of the outargs section.
n := int64(16) //TODO: compute max of all callsites
// Include one slot for deferreturn.
if false && n < f.Config.ptrSize { //TODO: check for deferreturn
n = f.Config.ptrSize
}
// TODO: group variables by ptr/nonptr, size, etc. Emit ptr vars last
// so stackmap is smaller.
// Assign stack locations to phis first, because we // Assign stack locations to phis first, because we
// must also assign the same locations to the phi copies // must also assign the same locations to the phi copies
...@@ -52,10 +63,49 @@ func stackalloc(f *Func) { ...@@ -52,10 +63,49 @@ func stackalloc(f *Func) {
home = setloc(home, v, loc) home = setloc(home, v, loc)
} }
} }
// TODO: align n
n += f.Config.ptrSize // space for return address. TODO: arch-dependent
f.RegAlloc = home f.RegAlloc = home
f.FrameSize = n
// TODO: share stack slots among noninterfering (& gc type compatible) values // TODO: share stack slots among noninterfering (& gc type compatible) values
// TODO: align final n
// TODO: compute total frame size: n + max paramout space // adjust all uses of FP to SP now that we have the frame size.
// TODO: save total size somewhere var fp *Value
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op == OpFP {
if fp != nil {
log.Panicf("multiple FP ops: %s %s", fp, v)
}
fp = v
}
for i, a := range v.Args {
if a.Op != OpFP {
continue
}
// TODO: do this with arch-specific rewrite rules somehow?
switch v.Op {
case OpADDQ:
// (ADDQ (FP) x) -> (LEAQ [n] (SP) x)
v.Op = OpLEAQ
v.Aux = n
case OpLEAQ, OpMOVQload, OpMOVQstore, OpMOVBload, OpMOVQloadidx8:
if v.Op == OpMOVQloadidx8 && i == 1 {
// Note: we could do it, but it is probably an error
log.Panicf("can't do FP->SP adjust on index slot of load %s", v.Op)
}
// eg: (MOVQload [c] (FP) mem) -> (MOVQload [c+n] (SP) mem)
v.Aux = addOffset(v.Aux.(int64), n)
default:
log.Panicf("can't do FP->SP adjust on %s", v.Op)
}
}
}
}
if fp != nil {
fp.Op = OpSP
home[fp.ID] = &registers[4] // TODO: arch-dependent
}
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment