Commit 6ec4c71e authored by Michael Munday's avatar Michael Munday

cmd/compile: add SSA rules for s390x compare-and-branch instructions

This commit adds SSA rules for the s390x combined compare-and-branch
instructions. These have a shorter encoding than separate compare
and branch instructions and they also don't clobber the condition
code (a.k.a. flag register) reducing pressure on the flag allocator.

I have deleted the 'loop_test.go' file and replaced it with a new
codegen test which performs a wider range of checks.

Object sizes from compilebench:

name                      old object-bytes  new object-bytes  delta
Template                        562kB ± 0%        561kB ± 0%   -0.28%  (p=0.000 n=10+10)
Unicode                         217kB ± 0%        217kB ± 0%   -0.17%  (p=0.000 n=10+10)
GoTypes                        2.03MB ± 0%       2.02MB ± 0%   -0.59%  (p=0.000 n=10+10)
Compiler                       8.16MB ± 0%       8.11MB ± 0%   -0.62%  (p=0.000 n=10+10)
SSA                            27.4MB ± 0%       27.0MB ± 0%   -1.45%  (p=0.000 n=10+10)
Flate                           356kB ± 0%        356kB ± 0%   -0.12%  (p=0.000 n=10+10)
GoParser                        438kB ± 0%        436kB ± 0%   -0.51%  (p=0.000 n=10+10)
Reflect                        1.37MB ± 0%       1.37MB ± 0%   -0.42%  (p=0.000 n=10+10)
Tar                             485kB ± 0%        483kB ± 0%   -0.39%  (p=0.000 n=10+10)
XML                             630kB ± 0%        621kB ± 0%   -1.45%  (p=0.000 n=10+10)
[Geo mean]                     1.14MB            1.13MB        -0.60%

name                      old text-bytes    new text-bytes    delta
HelloSize                       763kB ± 0%        754kB ± 0%   -1.30%  (p=0.000 n=10+10)
CmdGoSize                      10.7MB ± 0%       10.6MB ± 0%   -0.91%  (p=0.000 n=10+10)
[Geo mean]                     2.86MB            2.82MB        -1.10%

Change-Id: Ibca55d9c0aa1254aee69433731ab5d26a43a7c18
Reviewed-on: https://go-review.googlesource.com/c/go/+/198037
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 77f5adba
......@@ -814,7 +814,33 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
}
}
func blockAsm(b *ssa.Block) obj.As {
switch b.Kind {
case ssa.BlockS390XBRC:
return s390x.ABRC
case ssa.BlockS390XCRJ:
return s390x.ACRJ
case ssa.BlockS390XCGRJ:
return s390x.ACGRJ
case ssa.BlockS390XCLRJ:
return s390x.ACLRJ
case ssa.BlockS390XCLGRJ:
return s390x.ACLGRJ
case ssa.BlockS390XCIJ:
return s390x.ACIJ
case ssa.BlockS390XCGIJ:
return s390x.ACGIJ
case ssa.BlockS390XCLIJ:
return s390x.ACLIJ
case ssa.BlockS390XCLGIJ:
return s390x.ACLGIJ
}
b.Fatalf("blockAsm not implemented: %s", b.LongString())
panic("unreachable")
}
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
// Handle generic blocks first.
switch b.Kind {
case ssa.BlockPlain:
if b.Succs[0].Block() != next {
......@@ -822,47 +848,73 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
p.To.Type = obj.TYPE_BRANCH
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
}
return
case ssa.BlockDefer:
// defer returns in R3:
// 0 if we should continue executing
// 1 if we should jump to deferreturn call
p := s.Prog(s390x.ACMPW)
p.From.Type = obj.TYPE_REG
p.From.Reg = s390x.REG_R3
p.To.Type = obj.TYPE_CONST
p.To.Offset = 0
p = s.Prog(s390x.ABNE)
p.To.Type = obj.TYPE_BRANCH
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
p := s.Br(s390x.ACIJ, b.Succs[1].Block())
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(s390x.NotEqual & s390x.NotUnordered) // unordered is not possible
p.Reg = s390x.REG_R3
p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: 0}}
if b.Succs[0].Block() != next {
p := s.Prog(s390x.ABR)
p.To.Type = obj.TYPE_BRANCH
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
s.Br(s390x.ABR, b.Succs[0].Block())
}
return
case ssa.BlockExit:
return
case ssa.BlockRet:
s.Prog(obj.ARET)
return
case ssa.BlockRetJmp:
p := s.Prog(s390x.ABR)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = b.Aux.(*obj.LSym)
return
}
// Handle s390x-specific blocks. These blocks all have a
// condition code mask in the Aux value and 2 successors.
succs := [...]*ssa.Block{b.Succs[0].Block(), b.Succs[1].Block()}
mask := b.Aux.(s390x.CCMask)
// TODO: take into account Likely property for forward/backward
// branches. We currently can't do this because we don't know
// whether a block has already been emitted. In general forward
// branches are assumed 'not taken' and backward branches are
// assumed 'taken'.
if next == succs[0] {
succs[0], succs[1] = succs[1], succs[0]
mask = mask.Inverse()
}
p := s.Br(blockAsm(b), succs[0])
switch b.Kind {
case ssa.BlockS390XBRC:
succs := [...]*ssa.Block{b.Succs[0].Block(), b.Succs[1].Block()}
mask := b.Aux.(s390x.CCMask)
if next == succs[0] {
succs[0], succs[1] = succs[1], succs[0]
mask = mask.Inverse()
}
// TODO: take into account Likely property for forward/backward
// branches.
p := s.Br(s390x.ABRC, succs[0])
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(mask)
if next != succs[1] {
s.Br(s390x.ABR, succs[1])
}
case ssa.BlockS390XCGRJ, ssa.BlockS390XCRJ,
ssa.BlockS390XCLGRJ, ssa.BlockS390XCLRJ:
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible
p.Reg = b.Controls[0].Reg()
p.RestArgs = []obj.Addr{{Type: obj.TYPE_REG, Reg: b.Controls[1].Reg()}}
case ssa.BlockS390XCGIJ, ssa.BlockS390XCIJ:
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible
p.Reg = b.Controls[0].Reg()
p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: int64(int8(b.AuxInt))}}
case ssa.BlockS390XCLGIJ, ssa.BlockS390XCLIJ:
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible
p.Reg = b.Controls[0].Reg()
p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: int64(uint8(b.AuxInt))}}
default:
b.Fatalf("branch not implemented: %s", b.LongString())
}
if next != succs[1] {
s.Br(s390x.ABR, succs[1])
}
}
......@@ -52,7 +52,8 @@ type Block struct {
Controls [2]*Value
// Auxiliary info for the block. Its value depends on the Kind.
Aux interface{}
Aux interface{}
AuxInt int64
// The unordered set of Values that define the operation of this block.
// After the scheduling pass, this list is ordered.
......@@ -118,7 +119,17 @@ func (b *Block) String() string {
func (b *Block) LongString() string {
s := b.Kind.String()
if b.Aux != nil {
s += fmt.Sprintf(" %s", b.Aux)
s += fmt.Sprintf(" {%s}", b.Aux)
}
if t := b.Kind.AuxIntType(); t != "" {
switch t {
case "Int8":
s += fmt.Sprintf(" [%v]", int8(b.AuxInt))
case "UInt8":
s += fmt.Sprintf(" [%v]", uint8(b.AuxInt))
default:
s += fmt.Sprintf(" [%v]", b.AuxInt)
}
}
for _, c := range b.ControlValues() {
s += fmt.Sprintf(" %s", c)
......@@ -218,6 +229,7 @@ func (b *Block) Reset(kind BlockKind) {
b.Kind = kind
b.ResetControls()
b.Aux = nil
b.AuxInt = 0
}
// AddEdgeTo adds an edge from block b to block c. Used during building of the
......
......@@ -416,7 +416,7 @@
(ITab (Load ptr mem)) -> (MOVDload ptr mem)
// block rewrites
(If cond yes no) -> (BRC {s390x.NotEqual} (CMPWconst [0] (MOVBZreg <typ.Bool> cond)) yes no)
(If cond yes no) -> (CLIJ {s390x.LessOrGreater} (MOVBZreg <typ.Bool> cond) [0] yes no)
// Write barrier.
(WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
......@@ -548,15 +548,60 @@
-> x
// Fold boolean tests into blocks.
(BRC {c} (CMPWconst [0] (LOCGR {d} (MOVDconst [0]) (MOVDconst [x]) cmp)) yes no)
&& x != 0
&& c.(s390x.CCMask) == s390x.Equal
-> (BRC {d} cmp no yes)
(BRC {c} (CMPWconst [0] (LOCGR {d} (MOVDconst [0]) (MOVDconst [x]) cmp)) yes no)
&& x != 0
&& c.(s390x.CCMask) == s390x.NotEqual
// Note: this must match If statement lowering.
(CLIJ {s390x.LessOrGreater} (LOCGR {d} (MOVDconst [0]) (MOVDconst [x]) cmp) [0] yes no)
&& int32(x) != 0
-> (BRC {d} cmp yes no)
// Compare-and-branch.
// Note: bit 3 (unordered) must not be set so we mask out s390x.Unordered.
(BRC {c} (CMP x y) yes no) -> (CGRJ {c.(s390x.CCMask)&^s390x.Unordered} x y yes no)
(BRC {c} (CMPW x y) yes no) -> (CRJ {c.(s390x.CCMask)&^s390x.Unordered} x y yes no)
(BRC {c} (CMPU x y) yes no) -> (CLGRJ {c.(s390x.CCMask)&^s390x.Unordered} x y yes no)
(BRC {c} (CMPWU x y) yes no) -> (CLRJ {c.(s390x.CCMask)&^s390x.Unordered} x y yes no)
// Compare-and-branch (immediate).
// Note: bit 3 (unordered) must not be set so we mask out s390x.Unordered.
(BRC {c} (CMPconst x [y]) yes no) && is8Bit(y) -> (CGIJ {c.(s390x.CCMask)&^s390x.Unordered} x [int64(int8(y))] yes no)
(BRC {c} (CMPWconst x [y]) yes no) && is8Bit(y) -> (CIJ {c.(s390x.CCMask)&^s390x.Unordered} x [int64(int8(y))] yes no)
(BRC {c} (CMPUconst x [y]) yes no) && isU8Bit(y) -> (CLGIJ {c.(s390x.CCMask)&^s390x.Unordered} x [int64(int8(y))] yes no)
(BRC {c} (CMPWUconst x [y]) yes no) && isU8Bit(y) -> (CLIJ {c.(s390x.CCMask)&^s390x.Unordered} x [int64(int8(y))] yes no)
// Absorb immediate into compare-and-branch.
(C(R|GR)J {c} x (MOVDconst [y]) yes no) && is8Bit(y) -> (C(I|GI)J {c} x [int64(int8(y))] yes no)
(CL(R|GR)J {c} x (MOVDconst [y]) yes no) && isU8Bit(y) -> (CL(I|GI)J {c} x [int64(int8(y))] yes no)
(C(R|GR)J {c} (MOVDconst [x]) y yes no) && is8Bit(x) -> (C(I|GI)J {c.(s390x.CCMask).ReverseComparison()} y [int64(int8(x))] yes no)
(CL(R|GR)J {c} (MOVDconst [x]) y yes no) && isU8Bit(x) -> (CL(I|GI)J {c.(s390x.CCMask).ReverseComparison()} y [int64(int8(x))] yes no)
// Prefer comparison with immediate to compare-and-branch.
(CGRJ {c} x (MOVDconst [y]) yes no) && !is8Bit(y) && is32Bit(y) -> (BRC {c} (CMPconst x [int64(int32(y))]) yes no)
(CRJ {c} x (MOVDconst [y]) yes no) && !is8Bit(y) && is32Bit(y) -> (BRC {c} (CMPWconst x [int64(int32(y))]) yes no)
(CLGRJ {c} x (MOVDconst [y]) yes no) && !isU8Bit(y) && isU32Bit(y) -> (BRC {c} (CMPUconst x [int64(int32(y))]) yes no)
(CLRJ {c} x (MOVDconst [y]) yes no) && !isU8Bit(y) && isU32Bit(y) -> (BRC {c} (CMPWUconst x [int64(int32(y))]) yes no)
(CGRJ {c} (MOVDconst [x]) y yes no) && !is8Bit(x) && is32Bit(x) -> (BRC {c.(s390x.CCMask).ReverseComparison()} (CMPconst y [int64(int32(x))]) yes no)
(CRJ {c} (MOVDconst [x]) y yes no) && !is8Bit(x) && is32Bit(x) -> (BRC {c.(s390x.CCMask).ReverseComparison()} (CMPWconst y [int64(int32(x))]) yes no)
(CLGRJ {c} (MOVDconst [x]) y yes no) && !isU8Bit(x) && isU32Bit(x) -> (BRC {c.(s390x.CCMask).ReverseComparison()} (CMPUconst y [int64(int32(x))]) yes no)
(CLRJ {c} (MOVDconst [x]) y yes no) && !isU8Bit(x) && isU32Bit(x) -> (BRC {c.(s390x.CCMask).ReverseComparison()} (CMPWUconst y [int64(int32(x))]) yes no)
// Absorb sign/zero extensions into 32-bit compare-and-branch.
(CIJ {c} (MOV(W|WZ)reg x) [y] yes no) -> (CIJ {c} x [y] yes no)
(CLIJ {c} (MOV(W|WZ)reg x) [y] yes no) -> (CLIJ {c} x [y] yes no)
// Bring out-of-range signed immediates into range by varying branch condition.
(BRC {s390x.Less} (CMPconst x [ 128]) yes no) -> (CGIJ {s390x.LessOrEqual} x [ 127] yes no)
(BRC {s390x.Less} (CMPWconst x [ 128]) yes no) -> (CIJ {s390x.LessOrEqual} x [ 127] yes no)
(BRC {s390x.LessOrEqual} (CMPconst x [-129]) yes no) -> (CGIJ {s390x.Less} x [-128] yes no)
(BRC {s390x.LessOrEqual} (CMPWconst x [-129]) yes no) -> (CIJ {s390x.Less} x [-128] yes no)
(BRC {s390x.Greater} (CMPconst x [-129]) yes no) -> (CGIJ {s390x.GreaterOrEqual} x [-128] yes no)
(BRC {s390x.Greater} (CMPWconst x [-129]) yes no) -> (CIJ {s390x.GreaterOrEqual} x [-128] yes no)
(BRC {s390x.GreaterOrEqual} (CMPconst x [ 128]) yes no) -> (CGIJ {s390x.Greater} x [ 127] yes no)
(BRC {s390x.GreaterOrEqual} (CMPWconst x [ 128]) yes no) -> (CIJ {s390x.Greater} x [ 127] yes no)
// Bring out-of-range unsigned immediates into range by varying branch condition.
// Note: int64(int8(255)) == -1
(BRC {s390x.Less} (CMP(WU|U)const x [256]) yes no) -> (C(L|LG)IJ {s390x.LessOrEqual} x [-1] yes no)
(BRC {s390x.GreaterOrEqual} (CMP(WU|U)const x [256]) yes no) -> (C(L|LG)IJ {s390x.Greater} x [-1] yes no)
// Fold constants into instructions.
(ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
(ADDW x (MOVDconst [c])) -> (ADDWconst [int64(int32(c))] x)
......@@ -959,6 +1004,40 @@
(CMPWconst (ANDWconst _ [m]) [n]) && int32(m) >= 0 && int32(m) < int32(n) -> (FlagLT)
(CMPWUconst (ANDWconst _ [m]) [n]) && uint32(m) < uint32(n) -> (FlagLT)
// Constant compare-and-branch with immediate.
(CGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Equal != 0 && int64(x) == int64( int8(y)) -> (First yes no)
(CGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Less != 0 && int64(x) < int64( int8(y)) -> (First yes no)
(CGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Greater != 0 && int64(x) > int64( int8(y)) -> (First yes no)
(CIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Equal != 0 && int32(x) == int32( int8(y)) -> (First yes no)
(CIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Less != 0 && int32(x) < int32( int8(y)) -> (First yes no)
(CIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Greater != 0 && int32(x) > int32( int8(y)) -> (First yes no)
(CLGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Equal != 0 && uint64(x) == uint64(uint8(y)) -> (First yes no)
(CLGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Less != 0 && uint64(x) < uint64(uint8(y)) -> (First yes no)
(CLGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Greater != 0 && uint64(x) > uint64(uint8(y)) -> (First yes no)
(CLIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Equal != 0 && uint32(x) == uint32(uint8(y)) -> (First yes no)
(CLIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Less != 0 && uint32(x) < uint32(uint8(y)) -> (First yes no)
(CLIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Greater != 0 && uint32(x) > uint32(uint8(y)) -> (First yes no)
(CGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Equal == 0 && int64(x) == int64( int8(y)) -> (First no yes)
(CGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Less == 0 && int64(x) < int64( int8(y)) -> (First no yes)
(CGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Greater == 0 && int64(x) > int64( int8(y)) -> (First no yes)
(CIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Equal == 0 && int32(x) == int32( int8(y)) -> (First no yes)
(CIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Less == 0 && int32(x) < int32( int8(y)) -> (First no yes)
(CIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Greater == 0 && int32(x) > int32( int8(y)) -> (First no yes)
(CLGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Equal == 0 && uint64(x) == uint64(uint8(y)) -> (First no yes)
(CLGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Less == 0 && uint64(x) < uint64(uint8(y)) -> (First no yes)
(CLGIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Greater == 0 && uint64(x) > uint64(uint8(y)) -> (First no yes)
(CLIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Equal == 0 && uint32(x) == uint32(uint8(y)) -> (First no yes)
(CLIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Less == 0 && uint32(x) < uint32(uint8(y)) -> (First no yes)
(CLIJ {c} (MOVDconst [x]) [y] yes no) && c.(s390x.CCMask)&s390x.Greater == 0 && uint32(x) > uint32(uint8(y)) -> (First no yes)
// Constant compare-and-branch with immediate when unsigned comparison with zero.
(C(L|LG)IJ {s390x.GreaterOrEqual} _ [0] yes no) -> (First yes no)
(C(L|LG)IJ {s390x.Less} _ [0] yes no) -> (First no yes)
// Constant compare-and-branch when operands match.
(C(GR|R|LGR|LR)J {c} x y yes no) && x == y && c.(s390x.CCMask)&s390x.Equal != 0 -> (First yes no)
(C(GR|R|LGR|LR)J {c} x y yes no) && x == y && c.(s390x.CCMask)&s390x.Equal == 0 -> (First no yes)
// Convert 64-bit comparisons to 32-bit comparisons and signed comparisons
// to unsigned comparisons.
// Helps simplify constant comparison detection.
......
......@@ -707,8 +707,41 @@ func init() {
},
}
// All blocks on s390x have their condition code mask (s390x.CCMask) as the Aux value.
// The condition code mask is a 4-bit mask where each bit corresponds to a condition
// code value. If the value of the condition code matches a bit set in the condition
// code mask then the first successor is executed. Otherwise the second successor is
// executed.
//
// | condition code value | mask bit |
// +----------------------+------------+
// | 0 (equal) | 0b1000 (8) |
// | 1 (less than) | 0b0100 (4) |
// | 2 (greater than) | 0b0010 (2) |
// | 3 (unordered) | 0b0001 (1) |
//
// Note: that compare-and-branch instructions must not have bit 3 (0b0001) set.
var S390Xblocks = []blockData{
{name: "BRC", controls: 1}, // aux is condition code mask (s390x.CCMask)
// branch on condition
{name: "BRC", controls: 1}, // condition code value (flags) is Controls[0]
// compare-and-branch (register-register)
// - integrates comparison of Controls[0] with Controls[1]
// - both control values must be in general purpose registers
{name: "CRJ", controls: 2}, // signed 32-bit integer comparison
{name: "CGRJ", controls: 2}, // signed 64-bit integer comparison
{name: "CLRJ", controls: 2}, // unsigned 32-bit integer comparison
{name: "CLGRJ", controls: 2}, // unsigned 64-bit integer comparison
// compare-and-branch (register-immediate)
// - integrates comparison of Controls[0] with AuxInt
// - control value must be in a general purpose register
// - the AuxInt value is sign-extended for signed comparisons
// and zero-extended for unsigned comparisons
{name: "CIJ", controls: 1, auxint: "Int8"}, // signed 32-bit integer comparison
{name: "CGIJ", controls: 1, auxint: "Int8"}, // signed 64-bit integer comparison
{name: "CLIJ", controls: 1, auxint: "UInt8"}, // unsigned 32-bit integer comparison
{name: "CLGIJ", controls: 1, auxint: "UInt8"}, // unsigned 64-bit integer comparison
}
archs = append(archs, arch{
......
......@@ -70,6 +70,7 @@ type opData struct {
type blockData struct {
name string // the suffix for this block ("EQ", "LT", etc.)
controls int // the number of control values this type of block requires
auxint string // the type of the AuxInt value, if any
}
type regInfo struct {
......@@ -219,6 +220,19 @@ func genOp() {
fmt.Fprintln(w, "}")
fmt.Fprintln(w, "func (k BlockKind) String() string {return blockString[k]}")
// generate block kind auxint method
fmt.Fprintln(w, "var blockAuxIntType = [...]string{")
for _, a := range archs {
for _, b := range a.blocks {
if b.auxint == "" {
continue
}
fmt.Fprintf(w, "Block%s%s:\"%s\",\n", a.Name(), b.name, b.auxint)
}
}
fmt.Fprintln(w, "}")
fmt.Fprintln(w, "func (k BlockKind) AuxIntType() string {return blockAuxIntType[k]}")
// generate Op* declarations
fmt.Fprintln(w, "const (")
fmt.Fprintln(w, "OpInvalid Op = iota") // make sure OpInvalid is 0.
......
......@@ -749,7 +749,7 @@ func breakf(format string, a ...interface{}) *CondBreak {
func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite {
rr := &RuleRewrite{loc: rule.loc}
rr.match, rr.cond, rr.result = rule.parse()
_, _, _, aux, s := extract(rr.match) // remove parens, then split
_, _, auxint, aux, s := extract(rr.match) // remove parens, then split
// check match of control values
if len(s) < data.controls {
......@@ -781,15 +781,28 @@ func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite {
pos[i] = arg + ".Pos"
}
}
if aux != "" {
rr.add(declf(aux, "b.Aux"))
for _, e := range []struct {
name, field string
}{
{auxint, "AuxInt"},
{aux, "Aux"},
} {
if e.name == "" {
continue
}
if !token.IsIdentifier(e.name) || rr.declared(e.name) {
// code or variable
rr.add(breakf("b.%s != %s", e.field, e.name))
} else {
rr.add(declf(e.name, "b.%s", e.field))
}
}
if rr.cond != "" {
rr.add(breakf("!(%s)", rr.cond))
}
// Rule matches. Generate result.
outop, _, _, aux, t := extract(rr.result) // remove parens, then split
outop, _, auxint, aux, t := extract(rr.result) // remove parens, then split
_, outdata := getBlockInfo(outop, arch)
if len(t) < outdata.controls {
log.Fatalf("incorrect number of output arguments in %s, got %v wanted at least %v", rule, len(s), outdata.controls)
......@@ -832,6 +845,9 @@ func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite {
v := genResult0(rr, arch, control, false, false, newpos)
rr.add(stmtf("b.AddControl(%s)", v))
}
if auxint != "" {
rr.add(stmtf("b.AuxInt = %s", auxint))
}
if aux != "" {
rr.add(stmtf("b.Aux = %s", aux))
}
......
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import (
"cmd/compile/internal/types"
"cmd/internal/src"
"testing"
)
func TestLoopConditionS390X(t *testing.T) {
// Test that a simple loop condition does not generate a conditional
// move (issue #19227).
//
// MOVDLT is generated when Less64 is lowered but should be
// optimized into an LT branch.
//
// For example, compiling the following loop:
//
// for i := 0; i < N; i++ {
// sum += 3
// }
//
// should generate assembly similar to:
// loop:
// CMP R0, R1
// BGE done
// ADD $3, R4
// ADD $1, R1
// BR loop
// done:
//
// rather than:
// loop:
// MOVD $0, R2
// MOVD $1, R3
// CMP R0, R1
// LOCGR $(8+2) R2, R3
// CMPW R2, $0
// BNE done
// ADD $3, R4
// ADD $1, R1
// BR loop
// done:
//
c := testConfigS390X(t)
a := c.Frontend().Auto(src.NoXPos, c.config.Types.Int8)
fun := c.Fun("entry",
Bloc("entry",
Valu("mem", OpInitMem, types.TypeMem, 0, nil),
Valu("SP", OpSP, c.config.Types.Uintptr, 0, nil),
Valu("ret", OpLocalAddr, c.config.Types.Int64.PtrTo(), 0, nil, "SP", "mem"),
Valu("N", OpArg, c.config.Types.Int64, 0, c.Frontend().Auto(src.NoXPos, c.config.Types.Int64)),
Valu("starti", OpConst64, c.config.Types.Int64, 0, nil),
Valu("startsum", OpConst64, c.config.Types.Int64, 0, nil),
Goto("b1")),
Bloc("b1",
Valu("phii", OpPhi, c.config.Types.Int64, 0, nil, "starti", "i"),
Valu("phisum", OpPhi, c.config.Types.Int64, 0, nil, "startsum", "sum"),
Valu("cmp1", OpLess64, c.config.Types.Bool, 0, nil, "phii", "N"),
If("cmp1", "b2", "b3")),
Bloc("b2",
Valu("c1", OpConst64, c.config.Types.Int64, 1, nil),
Valu("i", OpAdd64, c.config.Types.Int64, 0, nil, "phii", "c1"),
Valu("c3", OpConst64, c.config.Types.Int64, 3, nil),
Valu("sum", OpAdd64, c.config.Types.Int64, 0, nil, "phisum", "c3"),
Goto("b1")),
Bloc("b3",
Valu("retdef", OpVarDef, types.TypeMem, 0, a, "mem"),
Valu("store", OpStore, types.TypeMem, 0, c.config.Types.Int64, "ret", "phisum", "retdef"),
Exit("store")))
CheckFunc(fun.f)
Compile(fun.f)
CheckFunc(fun.f)
checkOpcodeCounts(t, fun.f, map[Op]int{
OpS390XLOCGR: 0,
OpS390XCMP: 1,
OpS390XCMPWconst: 0,
})
}
......@@ -112,6 +112,14 @@ const (
BlockPPC64FGE
BlockS390XBRC
BlockS390XCRJ
BlockS390XCGRJ
BlockS390XCLRJ
BlockS390XCLGRJ
BlockS390XCIJ
BlockS390XCGIJ
BlockS390XCLIJ
BlockS390XCLGIJ
BlockPlain
BlockIf
......@@ -220,7 +228,15 @@ var blockString = [...]string{
BlockPPC64FGT: "FGT",
BlockPPC64FGE: "FGE",
BlockS390XBRC: "BRC",
BlockS390XBRC: "BRC",
BlockS390XCRJ: "CRJ",
BlockS390XCGRJ: "CGRJ",
BlockS390XCLRJ: "CLRJ",
BlockS390XCLGRJ: "CLGRJ",
BlockS390XCIJ: "CIJ",
BlockS390XCGIJ: "CGIJ",
BlockS390XCLIJ: "CLIJ",
BlockS390XCLGIJ: "CLGIJ",
BlockPlain: "Plain",
BlockIf: "If",
......@@ -233,6 +249,15 @@ var blockString = [...]string{
func (k BlockKind) String() string { return blockString[k] }
var blockAuxIntType = [...]string{
BlockS390XCIJ: "Int8",
BlockS390XCGIJ: "Int8",
BlockS390XCLIJ: "UInt8",
BlockS390XCLGIJ: "UInt8",
}
func (k BlockKind) AuxIntType() string { return blockAuxIntType[k] }
const (
OpInvalid Op = iota
......
......@@ -404,6 +404,16 @@ func is16Bit(n int64) bool {
return n == int64(int16(n))
}
// is8Bit reports whether n can be represented as a signed 8 bit integer.
func is8Bit(n int64) bool {
return n == int64(int8(n))
}
// isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
func isU8Bit(n int64) bool {
return n == int64(uint8(n))
}
// isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
func isU12Bit(n int64) bool {
return 0 <= n && n < (1<<12)
......
......@@ -23,7 +23,7 @@ func TestSizeof(t *testing.T) {
_64bit uintptr // size on 64bit platforms
}{
{Value{}, 72, 112},
{Block{}, 156, 296},
{Block{}, 164, 304},
{LocalSlot{}, 32, 48},
{valState{}, 28, 40},
}
......
// asmcheck
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
//go:noinline
func dummy() {}
// Signed 64-bit compare-and-branch.
func si64(x, y chan int64) {
// s390x:"CGRJ\t[$]4, R[0-9]+, R[0-9]+, "
for <-x < <-y {
dummy()
}
// s390x:"CL?GRJ\t[$]8, R[0-9]+, R[0-9]+, "
for <-x == <-y {
dummy()
}
}
// Signed 64-bit compare-and-branch with 8-bit immediate.
func si64x8() {
// s390x:"CGIJ\t[$]12, R[0-9]+, [$]127, "
for i := int64(0); i < 128; i++ {
dummy()
}
// s390x:"CGIJ\t[$]10, R[0-9]+, [$]-128, "
for i := int64(0); i > -129; i-- {
dummy()
}
// s390x:"CGIJ\t[$]2, R[0-9]+, [$]127, "
for i := int64(0); i >= 128; i++ {
dummy()
}
// s390x:"CGIJ\t[$]4, R[0-9]+, [$]-128, "
for i := int64(0); i <= -129; i-- {
dummy()
}
}
// Unsigned 64-bit compare-and-branch.
func ui64(x, y chan uint64) {
// s390x:"CLGRJ\t[$]2, R[0-9]+, R[0-9]+, "
for <-x > <-y {
dummy()
}
// s390x:"CL?GRJ\t[$]6, R[0-9]+, R[0-9]+, "
for <-x != <-y {
dummy()
}
}
// Unsigned 64-bit comparison with 8-bit immediate.
func ui64x8() {
// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]128, "
for i := uint64(0); i < 128; i++ {
dummy()
}
// s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255, "
for i := uint64(0); i < 256; i++ {
dummy()
}
// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255, "
for i := uint64(0); i >= 256; i-- {
dummy()
}
// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]0, "
for i := uint64(1024); i > 0; i-- {
dummy()
}
}
// Signed 32-bit compare-and-branch.
func si32(x, y chan int32) {
// s390x:"CRJ\t[$]4, R[0-9]+, R[0-9]+, "
for <-x < <-y {
dummy()
}
// s390x:"CL?RJ\t[$]8, R[0-9]+, R[0-9]+, "
for <-x == <-y {
dummy()
}
}
// Signed 32-bit compare-and-branch with 8-bit immediate.
func si32x8() {
// s390x:"CIJ\t[$]12, R[0-9]+, [$]127, "
for i := int32(0); i < 128; i++ {
dummy()
}
// s390x:"CIJ\t[$]10, R[0-9]+, [$]-128, "
for i := int32(0); i > -129; i-- {
dummy()
}
// s390x:"CIJ\t[$]2, R[0-9]+, [$]127, "
for i := int32(0); i >= 128; i++ {
dummy()
}
// s390x:"CIJ\t[$]4, R[0-9]+, [$]-128, "
for i := int32(0); i <= -129; i-- {
dummy()
}
}
// Unsigned 32-bit compare-and-branch.
func ui32(x, y chan uint32) {
// s390x:"CLRJ\t[$]2, R[0-9]+, R[0-9]+, "
for <-x > <-y {
dummy()
}
// s390x:"CL?RJ\t[$]6, R[0-9]+, R[0-9]+, "
for <-x != <-y {
dummy()
}
}
// Unsigned 32-bit comparison with 8-bit immediate.
func ui32x8() {
// s390x:"CLIJ\t[$]4, R[0-9]+, [$]128, "
for i := uint32(0); i < 128; i++ {
dummy()
}
// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255, "
for i := uint32(0); i < 256; i++ {
dummy()
}
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255, "
for i := uint32(0); i >= 256; i-- {
dummy()
}
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]0, "
for i := uint32(1024); i > 0; i-- {
dummy()
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment