Commit cddddbc6 authored by David Chase's avatar David Chase

cmd/compile: use ISEL, cleanup use of zero & extensions

Abandoned earlier efforts to expose zero register,
but left it in numbering to decrease squirrelyness of
register allocator.

ISELrelOp used in code generation of bool := x relOp y.
Some patterns added to better elide zero case and
some sign extension.

Updates: #17109

Change-Id: Ida7839f0023ca8f0ffddc0545f0ac269e65b05d9
Reviewed-on: https://go-review.googlesource.com/29380
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent dcbbd319
...@@ -336,6 +336,9 @@ const ( ...@@ -336,6 +336,9 @@ const (
// Instruction updates whichever of from/to is type D_OREG. (ppc64) // Instruction updates whichever of from/to is type D_OREG. (ppc64)
PostInc = 1 << 29 PostInc = 1 << 29
// Optional 3rd input operand, only ever read.
From3Read = 1 << 30
) )
type Arch struct { type Arch struct {
......
...@@ -624,6 +624,20 @@ func progeffects(prog *obj.Prog, vars []*Node, uevar bvec, varkill bvec, avarini ...@@ -624,6 +624,20 @@ func progeffects(prog *obj.Prog, vars []*Node, uevar bvec, varkill bvec, avarini
} }
} }
if info.Flags&From3Read != 0 {
from := prog.From3
if from.Node != nil && from.Sym != nil {
n := from.Node.(*Node)
if pos := liveIndex(n, vars); pos >= 0 {
if n.Addrtaken {
bvset(avarinit, pos)
} else {
bvset(uevar, pos)
}
}
}
}
if info.Flags&(RightRead|RightWrite|RightAddr) != 0 { if info.Flags&(RightRead|RightWrite|RightAddr) != 0 {
to := &prog.To to := &prog.To
if to.Node != nil && to.Sym != nil { if to.Node != nil && to.Sym != nil {
......
...@@ -100,6 +100,8 @@ var progtable = [ppc64.ALAST & obj.AMask]gc.ProgInfo{ ...@@ -100,6 +100,8 @@ var progtable = [ppc64.ALAST & obj.AMask]gc.ProgInfo{
ppc64.AMOVHZ & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv}, ppc64.AMOVHZ & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
ppc64.AMOVW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv}, ppc64.AMOVW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
ppc64.AISEL & obj.AMask: {Flags: gc.SizeQ | gc.RegRead | gc.From3Read | gc.RightWrite},
// there is no AMOVWU. // there is no AMOVWU.
ppc64.AMOVWZU & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv | gc.PostInc}, ppc64.AMOVWZU & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv | gc.PostInc},
ppc64.AMOVWZ & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv}, ppc64.AMOVWZ & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
......
...@@ -26,6 +26,28 @@ var condOps = map[ssa.Op]obj.As{ ...@@ -26,6 +26,28 @@ var condOps = map[ssa.Op]obj.As{
ssa.OpPPC64FGreaterEqual: ppc64.ABGT, // 2 branches for FCMP >=, second is BEQ ssa.OpPPC64FGreaterEqual: ppc64.ABGT, // 2 branches for FCMP >=, second is BEQ
} }
// iselOp encodes mapping of comparison operations onto ISEL operands
type iselOp struct {
cond int64
valueIfCond int // if cond is true, the value to return (0 or 1)
}
// Input registers to ISEL used for comparison. Index 0 is zero, 1 is (will be) 1
var iselRegs = [2]int16{ppc64.REG_R0, ppc64.REGTMP}
var iselOps = map[ssa.Op]iselOp{
ssa.OpPPC64Equal: iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 1},
ssa.OpPPC64NotEqual: iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 0},
ssa.OpPPC64LessThan: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1},
ssa.OpPPC64GreaterEqual: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 0},
ssa.OpPPC64GreaterThan: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1},
ssa.OpPPC64LessEqual: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 0},
ssa.OpPPC64FLessThan: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1},
ssa.OpPPC64FGreaterThan: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1},
ssa.OpPPC64FLessEqual: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ
ssa.OpPPC64FGreaterEqual: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ
}
// markMoves marks any MOVXconst ops that need to avoid clobbering flags. // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
// flive := b.FlagsLiveAtEnd // flive := b.FlagsLiveAtEnd
...@@ -34,7 +56,7 @@ func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { ...@@ -34,7 +56,7 @@ func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
// } // }
// for i := len(b.Values) - 1; i >= 0; i-- { // for i := len(b.Values) - 1; i >= 0; i-- {
// v := b.Values[i] // v := b.Values[i]
// if flive && (v.Op == ssa.OpPPC64MOVWconst || v.Op == ssa.OpPPC64MOVDconst) { // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
// // The "mark" is any non-nil Aux value. // // The "mark" is any non-nil Aux value.
// v.Aux = v // v.Aux = v
// } // }
...@@ -120,6 +142,17 @@ func scratchFpMem(s *gc.SSAGenState, a *obj.Addr) { ...@@ -120,6 +142,17 @@ func scratchFpMem(s *gc.SSAGenState, a *obj.Addr) {
a.Reg = ppc64.REGSP a.Reg = ppc64.REGSP
} }
func ssaGenISEL(v *ssa.Value, cr int64, r1, r2 int16) {
r := v.Reg()
p := gc.Prog(ppc64.AISEL)
p.To.Type = obj.TYPE_REG
p.To.Reg = r
p.Reg = r1
p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: r2}
p.From.Type = obj.TYPE_CONST
p.From.Offset = cr
}
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
s.SetLineno(v.Line) s.SetLineno(v.Line)
switch v.Op { switch v.Op {
...@@ -382,7 +415,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -382,7 +415,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg) v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
} }
case ssa.OpPPC64MOVDconst, ssa.OpPPC64MOVWconst: case ssa.OpPPC64MOVDconst:
p := gc.Prog(v.Op.Asm()) p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt p.From.Offset = v.AuxInt
...@@ -418,7 +451,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -418,7 +451,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload, ssa.OpPPC64MOVBload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload: case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload:
p := gc.Prog(v.Op.Asm()) p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg() p.From.Reg = v.Args[0].Reg()
...@@ -465,25 +498,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -465,25 +498,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.OpPPC64GreaterThan, ssa.OpPPC64GreaterThan,
ssa.OpPPC64FGreaterThan, ssa.OpPPC64FGreaterThan,
ssa.OpPPC64GreaterEqual: ssa.OpPPC64GreaterEqual:
// On Power7 or later, can use isel instruction: // On Power7 or later, can use isel instruction:
// for a < b, a > b, a = b: // for a < b, a > b, a = b:
// rt := 1 // rtmp := 1
// isel rt,rt,r0,cond // isel rt,rtmp,r0,cond // rt is target in ppc asm
// for a >= b, a <= b, a != b: // for a >= b, a <= b, a != b:
// rt := 1 // rtmp := 1
// isel rt,0,rt,!cond // isel rt,0,rtmp,!cond // rt is target in ppc asm
// However, PPCbe support is for older machines than that,
// and isel (which looks a lot like fsel) isn't recognized
// yet by the Go assembler. So for now, use the old instruction
// sequence, which we'll need anyway.
// TODO: add support for isel on PPCle and use it.
// generate boolean values
// use conditional move
p := gc.Prog(ppc64.AMOVW) if v.Block.Func.Config.OldArch {
p := gc.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
p.From.Offset = 1 p.From.Offset = 1
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
...@@ -492,7 +518,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -492,7 +518,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
pb := gc.Prog(condOps[v.Op]) pb := gc.Prog(condOps[v.Op])
pb.To.Type = obj.TYPE_BRANCH pb.To.Type = obj.TYPE_BRANCH
p = gc.Prog(ppc64.AMOVW) p = gc.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
p.From.Offset = 0 p.From.Offset = 0
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
...@@ -500,10 +526,21 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -500,10 +526,21 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p = gc.Prog(obj.ANOP) p = gc.Prog(obj.ANOP)
gc.Patch(pb, p) gc.Patch(pb, p)
break
}
// Modern PPC uses ISEL
p := gc.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_CONST
p.From.Offset = 1
p.To.Type = obj.TYPE_REG
p.To.Reg = iselRegs[1]
iop := iselOps[v.Op]
ssaGenISEL(v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond])
case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion
ssa.OpPPC64FGreaterEqual: ssa.OpPPC64FGreaterEqual:
if v.Block.Func.Config.OldArch {
p := gc.Prog(ppc64.AMOVW) p := gc.Prog(ppc64.AMOVW)
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
p.From.Offset = 1 p.From.Offset = 1
...@@ -524,6 +561,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -524,6 +561,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p = gc.Prog(obj.ANOP) p = gc.Prog(obj.ANOP)
gc.Patch(pb0, p) gc.Patch(pb0, p)
gc.Patch(pb1, p) gc.Patch(pb1, p)
break
}
// Modern PPC uses ISEL
p := gc.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_CONST
p.From.Offset = 1
p.To.Type = obj.TYPE_REG
p.To.Reg = iselRegs[1]
iop := iselOps[v.Op]
ssaGenISEL(v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond])
ssaGenISEL(v, ppc64.C_COND_EQ, iselRegs[1], v.Reg())
case ssa.OpPPC64LoweredZero: case ssa.OpPPC64LoweredZero:
// Similar to how this is done on ARM, // Similar to how this is done on ARM,
......
...@@ -32,8 +32,9 @@ type Config struct { ...@@ -32,8 +32,9 @@ type Config struct {
noDuffDevice bool // Don't use Duff's device noDuffDevice bool // Don't use Duff's device
nacl bool // GOOS=nacl nacl bool // GOOS=nacl
use387 bool // GO386=387 use387 bool // GO386=387
OldArch bool // True for older versions of architecture, e.g. true for PPC64BE, false for PPC64LE
NeedsFpScratch bool // No direct move between GP and FP register sets NeedsFpScratch bool // No direct move between GP and FP register sets
DebugTest bool // as a debugging aid for binary search using GOSSAHASH, make buggy new code conditional on this DebugTest bool // default true unless $GOSSAHASH != ""; as a debugging aid, make new code conditional on this and use GOSSAHASH to binary search for failing cases
sparsePhiCutoff uint64 // Sparse phi location algorithm used above this #blocks*#variables score sparsePhiCutoff uint64 // Sparse phi location algorithm used above this #blocks*#variables score
curFunc *Func curFunc *Func
...@@ -180,7 +181,10 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config ...@@ -180,7 +181,10 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.FPReg = framepointerRegARM64 c.FPReg = framepointerRegARM64
c.hasGReg = true c.hasGReg = true
c.noDuffDevice = obj.GOOS == "darwin" // darwin linker cannot handle BR26 reloc with non-zero addend c.noDuffDevice = obj.GOOS == "darwin" // darwin linker cannot handle BR26 reloc with non-zero addend
case "ppc64le", "ppc64": case "ppc64":
c.OldArch = true
fallthrough
case "ppc64le":
c.IntSize = 8 c.IntSize = 8
c.PtrSize = 8 c.PtrSize = 8
c.lowerBlock = rewriteBlockPPC64 c.lowerBlock = rewriteBlockPPC64
......
...@@ -149,14 +149,14 @@ ...@@ -149,14 +149,14 @@
// (MaskIfNotCarry CarrySet) -> -1 // (MaskIfNotCarry CarrySet) -> -1
// Lowering constants // Lowering constants
(Const8 [val]) -> (MOVWconst [val]) (Const8 [val]) -> (MOVDconst [val])
(Const16 [val]) -> (MOVWconst [val]) (Const16 [val]) -> (MOVDconst [val])
(Const32 [val]) -> (MOVWconst [val]) (Const32 [val]) -> (MOVDconst [val])
(Const64 [val]) -> (MOVDconst [val]) (Const64 [val]) -> (MOVDconst [val])
(Const32F [val]) -> (FMOVSconst [val]) (Const32F [val]) -> (FMOVSconst [val])
(Const64F [val]) -> (FMOVDconst [val]) (Const64F [val]) -> (FMOVDconst [val])
(ConstNil) -> (MOVDconst [0]) (ConstNil) -> (MOVDconst [0])
(ConstBool [b]) -> (MOVWconst [b]) (ConstBool [b]) -> (MOVDconst [b])
(Addr {sym} base) -> (MOVDaddr {sym} base) (Addr {sym} base) -> (MOVDaddr {sym} base)
// (Addr {sym} base) -> (ADDconst {sym} base) // (Addr {sym} base) -> (ADDconst {sym} base)
...@@ -326,23 +326,18 @@ ...@@ -326,23 +326,18 @@
(EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no) (EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
(NE (InvertFlags cmp) yes no) -> (NE cmp yes no) (NE (InvertFlags cmp) yes no) -> (NE cmp yes no)
// (FLT (InvertFlags cmp) yes no) -> (FGT cmp yes no)
// (FGT (InvertFlags cmp) yes no) -> (FLT cmp yes no)
// (FLE (InvertFlags cmp) yes no) -> (FGE cmp yes no)
// (FGE (InvertFlags cmp) yes no) -> (FLE cmp yes no)
// constant comparisons // constant comparisons
(CMPWconst (MOVWconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ) (CMPWconst (MOVDconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
(CMPWconst (MOVWconst [x]) [y]) && int32(x)<int32(y) -> (FlagLT) (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) -> (FlagLT)
(CMPWconst (MOVWconst [x]) [y]) && int32(x)>int32(y) -> (FlagGT) (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) -> (FlagGT)
(CMPconst (MOVDconst [x]) [y]) && int64(x)==int64(y) -> (FlagEQ) (CMPconst (MOVDconst [x]) [y]) && int64(x)==int64(y) -> (FlagEQ)
(CMPconst (MOVDconst [x]) [y]) && int64(x)<int64(y) -> (FlagLT) (CMPconst (MOVDconst [x]) [y]) && int64(x)<int64(y) -> (FlagLT)
(CMPconst (MOVDconst [x]) [y]) && int64(x)>int64(y) -> (FlagGT) (CMPconst (MOVDconst [x]) [y]) && int64(x)>int64(y) -> (FlagGT)
(CMPWUconst (MOVWconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ) (CMPWUconst (MOVDconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
(CMPWUconst (MOVWconst [x]) [y]) && uint32(x)<uint32(y) -> (FlagLT) (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)<uint32(y) -> (FlagLT)
(CMPWUconst (MOVWconst [x]) [y]) && uint32(x)>uint32(y) -> (FlagGT) (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)>uint32(y) -> (FlagGT)
(CMPUconst (MOVDconst [x]) [y]) && int64(x)==int64(y) -> (FlagEQ) (CMPUconst (MOVDconst [x]) [y]) && int64(x)==int64(y) -> (FlagEQ)
(CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) -> (FlagLT) (CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) -> (FlagLT)
...@@ -355,29 +350,29 @@ ...@@ -355,29 +350,29 @@
//(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint32(32-c)) <= uint32(n) -> (FlagLT) //(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint32(32-c)) <= uint32(n) -> (FlagLT)
// absorb flag constants into boolean values // absorb flag constants into boolean values
(Equal (FlagEQ)) -> (MOVWconst [1]) (Equal (FlagEQ)) -> (MOVDconst [1])
(Equal (FlagLT)) -> (MOVWconst [0]) (Equal (FlagLT)) -> (MOVDconst [0])
(Equal (FlagGT)) -> (MOVWconst [0]) (Equal (FlagGT)) -> (MOVDconst [0])
(NotEqual (FlagEQ)) -> (MOVWconst [0]) (NotEqual (FlagEQ)) -> (MOVDconst [0])
(NotEqual (FlagLT)) -> (MOVWconst [1]) (NotEqual (FlagLT)) -> (MOVDconst [1])
(NotEqual (FlagGT)) -> (MOVWconst [1]) (NotEqual (FlagGT)) -> (MOVDconst [1])
(LessThan (FlagEQ)) -> (MOVWconst [0]) (LessThan (FlagEQ)) -> (MOVDconst [0])
(LessThan (FlagLT)) -> (MOVWconst [1]) (LessThan (FlagLT)) -> (MOVDconst [1])
(LessThan (FlagGT)) -> (MOVWconst [0]) (LessThan (FlagGT)) -> (MOVDconst [0])
(LessEqual (FlagEQ)) -> (MOVWconst [1]) (LessEqual (FlagEQ)) -> (MOVDconst [1])
(LessEqual (FlagLT)) -> (MOVWconst [1]) (LessEqual (FlagLT)) -> (MOVDconst [1])
(LessEqual (FlagGT)) -> (MOVWconst [0]) (LessEqual (FlagGT)) -> (MOVDconst [0])
(GreaterThan (FlagEQ)) -> (MOVWconst [0]) (GreaterThan (FlagEQ)) -> (MOVDconst [0])
(GreaterThan (FlagLT)) -> (MOVWconst [0]) (GreaterThan (FlagLT)) -> (MOVDconst [0])
(GreaterThan (FlagGT)) -> (MOVWconst [1]) (GreaterThan (FlagGT)) -> (MOVDconst [1])
(GreaterEqual (FlagEQ)) -> (MOVWconst [1]) (GreaterEqual (FlagEQ)) -> (MOVDconst [1])
(GreaterEqual (FlagLT)) -> (MOVWconst [0]) (GreaterEqual (FlagLT)) -> (MOVDconst [0])
(GreaterEqual (FlagGT)) -> (MOVWconst [1]) (GreaterEqual (FlagGT)) -> (MOVDconst [1])
// absorb InvertFlags into boolean values // absorb InvertFlags into boolean values
(Equal (InvertFlags x)) -> (Equal x) (Equal (InvertFlags x)) -> (Equal x)
...@@ -387,19 +382,14 @@ ...@@ -387,19 +382,14 @@
(LessEqual (InvertFlags x)) -> (GreaterEqual x) (LessEqual (InvertFlags x)) -> (GreaterEqual x)
(GreaterEqual (InvertFlags x)) -> (LessEqual x) (GreaterEqual (InvertFlags x)) -> (LessEqual x)
// (FLessThan (InvertFlags x)) -> (FGreaterThan x)
// (FGreaterThan (InvertFlags x)) -> (FLessThan x)
// (FLessEqual (InvertFlags x)) -> (FGreaterEqual x)
// (FGreaterEqual (InvertFlags x)) -> (FLessEqual x)
// Lowering loads // Lowering loads
(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem) (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
(Load <t> ptr mem) && is32BitInt(t) && isSigned(t) -> (MOVWload ptr mem) (Load <t> ptr mem) && is32BitInt(t) && isSigned(t) -> (MOVWload ptr mem)
(Load <t> ptr mem) && is32BitInt(t) && !isSigned(t) -> (MOVWZload ptr mem) (Load <t> ptr mem) && is32BitInt(t) && !isSigned(t) -> (MOVWZload ptr mem)
(Load <t> ptr mem) && is16BitInt(t) && isSigned(t) -> (MOVHload ptr mem) (Load <t> ptr mem) && is16BitInt(t) && isSigned(t) -> (MOVHload ptr mem)
(Load <t> ptr mem) && is16BitInt(t) && !isSigned(t) -> (MOVHZload ptr mem) (Load <t> ptr mem) && is16BitInt(t) && !isSigned(t) -> (MOVHZload ptr mem)
(Load <t> ptr mem) && (t.IsBoolean() || (is8BitInt(t) && isSigned(t))) -> (MOVBload ptr mem) (Load <t> ptr mem) && t.IsBoolean() -> (MOVBZload ptr mem)
(Load <t> ptr mem) && is8BitInt(t) && isSigned(t) -> (MOVBreg (MOVBZload ptr mem)) // PPC has no signed-byte load.
(Load <t> ptr mem) && is8BitInt(t) && !isSigned(t) -> (MOVBZload ptr mem) (Load <t> ptr mem) && is8BitInt(t) && !isSigned(t) -> (MOVBZload ptr mem)
(Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
...@@ -533,6 +523,15 @@ ...@@ -533,6 +523,15 @@
(ADD (MOVDconst [c]) x) && is32Bit(c) -> (ADDconst [c] x) (ADD (MOVDconst [c]) x) && is32Bit(c) -> (ADDconst [c] x)
(ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x) (ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
(ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x) (ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
(ADDconst [0] x) -> x
(ANDconst [-1] x) -> x
(ANDconst [0] _) -> (MOVDconst [0])
(XORconst [0] x) -> x
(XOR (MOVDconst [0]) x) -> x
(XOR x (MOVDconst [0])) -> x
(ADD (MOVDconst [0]) x) -> x
(ADD x (MOVDconst [0])) -> x
// Fold offsets for stores. // Fold offsets for stores.
(MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} x val mem) (MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} x val mem)
...@@ -557,8 +556,6 @@ ...@@ -557,8 +556,6 @@
(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) -> (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
(MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBZload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) -> (MOVBZload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
(MOVBZload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBZload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) -> (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
...@@ -585,7 +582,6 @@ ...@@ -585,7 +582,6 @@
(MOVWZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVWZload [off1+off2] {sym} x mem) (MOVWZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVWZload [off1+off2] {sym} x mem)
(MOVHload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVHload [off1+off2] {sym} x mem) (MOVHload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVHload [off1+off2] {sym} x mem)
(MOVHZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVHZload [off1+off2] {sym} x mem) (MOVHZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVHZload [off1+off2] {sym} x mem)
(MOVBload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVBload [off1+off2] {sym} x mem)
(MOVBZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVBZload [off1+off2] {sym} x mem) (MOVBZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVBZload [off1+off2] {sym} x mem)
// Store of zero -> storezero // Store of zero -> storezero
...@@ -630,6 +626,16 @@ ...@@ -630,6 +626,16 @@
// Note that MOV??reg returns a 64-bit int, x is not necessarily that wide // Note that MOV??reg returns a 64-bit int, x is not necessarily that wide
// This may interact with other patterns in the future. (Compare with arm64) // This may interact with other patterns in the future. (Compare with arm64)
(MOVBZreg x:(MOVBZload _ _)) -> x (MOVBZreg x:(MOVBZload _ _)) -> x
(MOVBreg x:(MOVBload _ _)) -> x
(MOVHZreg x:(MOVHZload _ _)) -> x (MOVHZreg x:(MOVHZload _ _)) -> x
(MOVHreg x:(MOVHload _ _)) -> x (MOVHreg x:(MOVHload _ _)) -> x
(MOVBZreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))])
(MOVBreg (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
(MOVHZreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
(MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
(MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (MOVBZreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
(MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
(MOVHstore [off] {sym} ptr (MOVHZreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
...@@ -17,7 +17,7 @@ import "strings" ...@@ -17,7 +17,7 @@ import "strings"
// register (R31). // register (R31).
var regNamesPPC64 = []string{ var regNamesPPC64 = []string{
// "R0", // REGZERO "R0", // REGZERO, not used, but simplifies counting in regalloc
"SP", // REGSP "SP", // REGSP
"SB", // REGSB "SB", // REGSB
"R3", "R3",
...@@ -233,7 +233,6 @@ func init() { ...@@ -233,7 +233,6 @@ func init() {
{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64 {name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"}, // sign extend int32 to int64 {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"}, // sign extend int32 to int64
{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64 {name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", typ: "Int8", faultOnNilArg0: true}, // sign extend int8 to int64
{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true}, // zero extend uint8 to uint64 {name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true}, // zero extend uint8 to uint64
{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true}, // sign extend int16 to int64 {name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true}, // sign extend int16 to int64
{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true}, // zero extend uint16 to uint64 {name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true}, // zero extend uint16 to uint64
...@@ -258,7 +257,6 @@ func init() { ...@@ -258,7 +257,6 @@ func init() {
{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB {name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", rematerializeable: true}, // {name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", rematerializeable: true}, //
{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", rematerializeable: true}, // 32 low bits of auxint
{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true}, // {name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true}, //
{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true}, // {name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true}, //
{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"}, {name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
......
This diff is collapsed.
...@@ -485,7 +485,7 @@ func (s *regAllocState) init(f *Func) { ...@@ -485,7 +485,7 @@ func (s *regAllocState) init(f *Func) {
if s.f.Config.ctxt.Flag_shared { if s.f.Config.ctxt.Flag_shared {
switch s.f.Config.arch { switch s.f.Config.arch {
case "ppc64le": // R2 already reserved. case "ppc64le": // R2 already reserved.
s.allocatable &^= 1 << 11 // R12 -- R0 is skipped in PPC64Ops.go s.allocatable &^= 1 << 12 // R12
} }
} }
if s.f.Config.ctxt.Flag_dynlink { if s.f.Config.ctxt.Flag_dynlink {
...@@ -495,7 +495,7 @@ func (s *regAllocState) init(f *Func) { ...@@ -495,7 +495,7 @@ func (s *regAllocState) init(f *Func) {
case "arm": case "arm":
s.allocatable &^= 1 << 9 // R9 s.allocatable &^= 1 << 9 // R9
case "ppc64le": // R2 already reserved. case "ppc64le": // R2 already reserved.
s.allocatable &^= 1 << 11 // R12 -- R0 is skipped in PPC64Ops.go s.allocatable &^= 1 << 12 // R12
case "arm64": case "arm64":
// nothing to do? // nothing to do?
case "386": case "386":
...@@ -813,7 +813,9 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -813,7 +813,9 @@ func (s *regAllocState) regalloc(f *Func) {
continue continue
} }
a := v.Args[idx] a := v.Args[idx]
m := s.values[a.ID].regs &^ phiUsed // Some instructions target not-allocatable registers.
// They're not suitable for further (phi-function) allocation.
m := s.values[a.ID].regs &^ phiUsed & s.allocatable
if m != 0 { if m != 0 {
r := pickReg(m) r := pickReg(m)
s.freeReg(r) s.freeReg(r)
...@@ -1942,7 +1944,7 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, line int32 ...@@ -1942,7 +1944,7 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, line int32
var x *Value var x *Value
if c == nil { if c == nil {
if !e.s.values[vid].rematerializeable { if !e.s.values[vid].rematerializeable {
e.s.f.Fatalf("can't find source for %s->%s: v%d\n", e.p, e.b, vid) e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
} }
if dstReg { if dstReg {
x = v.copyInto(e.p) x = v.copyInto(e.p)
......
...@@ -356,6 +356,15 @@ func clobber(v *Value) bool { ...@@ -356,6 +356,15 @@ func clobber(v *Value) bool {
return true return true
} }
// noteRule is an easy way to track if a rule is matched when writing
// new ones. Make the rule of interest also conditional on
// noteRule("note to self: rule of interest matched")
// and that message will print when the rule matches.
func noteRule(s string) bool {
println(s)
return true
}
// logRule logs the use of the rule s. This will only be enabled if // logRule logs the use of the rule s. This will only be enabled if
// rewrite rules were generated with the -log option, see gen/rulegen.go. // rewrite rules were generated with the -log option, see gen/rulegen.go.
func logRule(s string) { func logRule(s string) {
......
...@@ -219,7 +219,7 @@ const ( ...@@ -219,7 +219,7 @@ const (
C_COND_LT = iota // 0 result is negative C_COND_LT = iota // 0 result is negative
C_COND_GT // 1 result is positive C_COND_GT // 1 result is positive
C_COND_EQ // 2 result is zero C_COND_EQ // 2 result is zero
C_COND_SO // 3 summary overflow C_COND_SO // 3 summary overflow or FP compare w/ NaN
) )
const ( const (
...@@ -300,8 +300,8 @@ const ( ...@@ -300,8 +300,8 @@ const (
ABLE // not GT = L/E/U ABLE // not GT = L/E/U
ABLT ABLT
ABNE // not EQ = L/G/U ABNE // not EQ = L/G/U
ABVC // apparently Unordered-clear ABVC // Unordered-clear
ABVS // apparently Unordered-set ABVS // Unordered-set
ACMP ACMP
ACMPU ACMPU
ACNTLZW ACNTLZW
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment