Commit 38c4a737 authored by Michael Munday's avatar Michael Munday

cmd/asm: add s390x branch-on-count instructions

The branch-on-count instructions on s390x decrement the input
register and then compare its value to 0. If not equal the branch
is taken.

These instructions are useful for implementing loops with a set
number of iterations (which might be in a register).

For example, this for loop:

	for i := 0; i < n; i++ {
		... // i is not used or modified in the loop
	}

Could be implemented using this assembly:

	MOVD  Rn, Ri
loop:
	...
	BRCTG Ri, loop

Note that i will count down from n in the assembly whereas in the
original for loop it counted up to n which is why we can't use i
in the loop.

These instructions will only be used in hand-written codegen and
assembly for now since SSA blocks cannot currently modify values.
We could look into this in the future though.

Change-Id: Iaab93b8aa2699513b825439b8ea20d8fe2ea1ee6
Reviewed-on: https://go-review.googlesource.com/c/go/+/199977
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent ff86ce13
...@@ -30,6 +30,8 @@ func jumpS390x(word string) bool { ...@@ -30,6 +30,8 @@ func jumpS390x(word string) bool {
"BR", "BR",
"BVC", "BVC",
"BVS", "BVS",
"BRCT",
"BRCTG",
"CMPBEQ", "CMPBEQ",
"CMPBGE", "CMPBGE",
"CMPBGT", "CMPBGT",
......
...@@ -266,6 +266,9 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- ...@@ -266,6 +266,9 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
BLTU 0(PC) // a7540000 BLTU 0(PC) // a7540000
BLEU 0(PC) // a7d40000 BLEU 0(PC) // a7d40000
BRCT R1, 0(PC) // a7160000
BRCTG R2, 0(PC) // a7270000
CMPBNE R1, R2, 0(PC) // ec1200007064 CMPBNE R1, R2, 0(PC) // ec1200007064
CMPBEQ R3, R4, 0(PC) // ec3400008064 CMPBEQ R3, R4, 0(PC) // ec3400008064
CMPBLT R5, R6, 0(PC) // ec5600004064 CMPBLT R5, R6, 0(PC) // ec5600004064
......
...@@ -38,18 +38,14 @@ func zerorange(pp *gc.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog { ...@@ -38,18 +38,14 @@ func zerorange(pp *gc.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
// Generate a loop of large clears. // Generate a loop of large clears.
if cnt > clearLoopCutoff { if cnt > clearLoopCutoff {
n := cnt - (cnt % 256) ireg := int16(s390x.REGRT2) // register holds number of remaining loop iterations
end := int16(s390x.REGRT2) p = pp.Appendpp(p, s390x.AMOVD, obj.TYPE_CONST, 0, cnt/256, obj.TYPE_REG, ireg, 0)
p = pp.Appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, off+n, obj.TYPE_REG, end, 0)
p.Reg = reg
p = pp.Appendpp(p, s390x.ACLEAR, obj.TYPE_CONST, 0, 256, obj.TYPE_MEM, reg, off) p = pp.Appendpp(p, s390x.ACLEAR, obj.TYPE_CONST, 0, 256, obj.TYPE_MEM, reg, off)
pl := p pl := p
p = pp.Appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, 256, obj.TYPE_REG, reg, 0) p = pp.Appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, 256, obj.TYPE_REG, reg, 0)
p = pp.Appendpp(p, s390x.ACMP, obj.TYPE_REG, reg, 0, obj.TYPE_REG, end, 0) p = pp.Appendpp(p, s390x.ABRCTG, obj.TYPE_REG, ireg, 0, obj.TYPE_BRANCH, 0, 0)
p = pp.Appendpp(p, s390x.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0)
gc.Patch(p, pl) gc.Patch(p, pl)
cnt = cnt % 256
cnt -= n
} }
// Generate remaining clear instructions without a loop. // Generate remaining clear instructions without a loop.
......
...@@ -409,6 +409,10 @@ const ( ...@@ -409,6 +409,10 @@ const (
ABVS ABVS
ASYSCALL ASYSCALL
// branch on count
ABRCT
ABRCTG
// compare and branch // compare and branch
ACRJ ACRJ
ACGRJ ACGRJ
......
...@@ -155,6 +155,8 @@ var Anames = []string{ ...@@ -155,6 +155,8 @@ var Anames = []string{
"BVC", "BVC",
"BVS", "BVS",
"SYSCALL", "SYSCALL",
"BRCT",
"BRCTG",
"CRJ", "CRJ",
"CGRJ", "CGRJ",
"CLRJ", "CLRJ",
......
...@@ -256,6 +256,10 @@ var optab = []Optab{ ...@@ -256,6 +256,10 @@ var optab = []Optab{
{i: 90, as: ACLGIJ, a1: C_SCON, a2: C_REG, a3: C_ADDCON, a6: C_SBRA}, {i: 90, as: ACLGIJ, a1: C_SCON, a2: C_REG, a3: C_ADDCON, a6: C_SBRA},
{i: 90, as: ACMPUBEQ, a1: C_REG, a3: C_ANDCON, a6: C_SBRA}, {i: 90, as: ACMPUBEQ, a1: C_REG, a3: C_ANDCON, a6: C_SBRA},
// branch on count
{i: 41, as: ABRCT, a1: C_REG, a6: C_SBRA},
{i: 41, as: ABRCTG, a1: C_REG, a6: C_SBRA},
// move on condition // move on condition
{i: 17, as: AMOVDEQ, a1: C_REG, a6: C_REG}, {i: 17, as: AMOVDEQ, a1: C_REG, a6: C_REG},
...@@ -3394,6 +3398,21 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { ...@@ -3394,6 +3398,21 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
*asm = append(*asm, uint8(wd)) *asm = append(*asm, uint8(wd))
} }
case 41: // branch on count
r1 := p.From.Reg
ri2 := (p.Pcond.Pc - p.Pc) >> 1
if int64(int16(ri2)) != ri2 {
c.ctxt.Diag("branch target too far away")
}
var opcode uint32
switch p.As {
case ABRCT:
opcode = op_BRCT
case ABRCTG:
opcode = op_BRCTG
}
zRI(opcode, uint32(r1), uint32(ri2), asm)
case 47: // negate [reg] reg case 47: // negate [reg] reg
r := p.From.Reg r := p.From.Reg
if r == 0 { if r == 0 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment