Commit 9248ff46 authored by Lynn Boger's avatar Lynn Boger

cmd/compile: add rotates to PPC64.rules

This updates PPC64.rules to include rules to generate rotates
for ADD, OR, XOR operators that combine two opposite shifts
that sum to 32 or 64.

To support this change opcodes for ROTL and ROTLW were added to
be used like the rotldi and rotlwi extended mnemonics.

This provides the following improvement in sha3:

BenchmarkPermutationFunction-8     302.83       376.40       1.24x
BenchmarkSha3_512_MTU-8            98.64        121.92       1.24x
BenchmarkSha3_384_MTU-8            136.80       168.30       1.23x
BenchmarkSha3_256_MTU-8            169.21       211.29       1.25x
BenchmarkSha3_224_MTU-8            179.76       221.19       1.23x
BenchmarkShake128_MTU-8            212.87       263.23       1.24x
BenchmarkShake256_MTU-8            196.62       245.60       1.25x
BenchmarkShake256_16x-8            163.57       194.37       1.19x
BenchmarkShake256_1MiB-8           199.02       248.74       1.25x
BenchmarkSha3_512_1MiB-8           106.55       133.13       1.25x

Fixes #20030

Change-Id: I484c56f48395d32f53ff3ecb3ac6cb8191cfee44
Reviewed-on: https://go-review.googlesource.com/40992
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: default avatarMichael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 865b50c9
......@@ -581,6 +581,15 @@ label1:
// cmpb RA,RS,RB
CMPB R2,R2,R1
//
// rotate extended mnemonics map onto other shift instructions
//
ROTL $12,R2,R3
ROTL R2,R3,R4
ROTLW $9,R2,R3
ROTLW R2,R3,R4
//
// rotate and mask
//
......@@ -617,6 +626,17 @@ label1:
RLDIMI $7, R2, $52, R7
// opcodes for right and left shifts, const and reg shift counts
SLD $4, R3, R4
SLD R2, R3, R4
SLW $4, R3, R4
SLW R2, R3, R4
SRD $8, R3, R4
SRD R2, R3, R4
SRW $8, R3, R4
SRW R2, R3, R4
//
// load/store multiple
//
......
......@@ -1543,6 +1543,54 @@ var linuxPPC64LETests = []*asmTest{
`,
[]string{"\tFMSUBS\t"},
},
{
`
func f4(x uint32) uint32 {
return x<<7 | x>>25
}
`,
[]string{"\tROTLW\t"},
},
{
`
func f5(x uint32) uint32 {
return x<<7 + x>>25
}
`,
[]string{"\tROTLW\t"},
},
{
`
func f6(x uint32) uint32 {
return x<<7 ^ x>>25
}
`,
[]string{"\tROTLW\t"},
},
{
`
func f7(x uint64) uint64 {
return x<<7 | x>>57
}
`,
[]string{"\tROTL\t"},
},
{
`
func f8(x uint64) uint64 {
return x<<7 + x>>57
}
`,
[]string{"\tROTL\t"},
},
{
`
func f9(x uint64) uint64 {
return x<<7 ^ x>>57
}
`,
[]string{"\tROTL\t"},
},
}
// TestLineNumber checks to make sure the generated assembly has line numbers
......
......@@ -554,6 +554,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
r := v.Reg()
r1 := v.Args[0].Reg()
......
......@@ -85,6 +85,15 @@
(ConstNil) -> (MOVDconst [0])
(ConstBool [b]) -> (MOVDconst [b])
// Rotate generation
(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
(ADD (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
( OR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
(XOR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
(Lsh64x64 x (Const64 [c])) && uint64(c) < 64 -> (SLDconst x [c])
(Rsh64x64 x (Const64 [c])) && uint64(c) < 64 -> (SRADconst x [c])
(Rsh64Ux64 x (Const64 [c])) && uint64(c) < 64 -> (SRDconst x [c])
......
......@@ -195,6 +195,9 @@ func init() {
{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"}, // arg0 << aux, 64 bits
{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"}, // arg0 << aux, 32 bits
{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits
{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"}, // arg0/arg1
{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
......
......@@ -1292,6 +1292,8 @@ const (
OpPPC64SRWconst
OpPPC64SLDconst
OpPPC64SLWconst
OpPPC64ROTLconst
OpPPC64ROTLWconst
OpPPC64FDIV
OpPPC64FDIVS
OpPPC64DIVD
......@@ -16537,6 +16539,34 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ROTLconst",
auxType: auxInt64,
argLen: 1,
asm: ppc64.AROTL,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "ROTLWconst",
auxType: auxInt64,
argLen: 1,
asm: ppc64.AROTLW,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "FDIV",
argLen: 2,
......
......@@ -4391,6 +4391,110 @@ func rewriteValuePPC64_OpOrB(v *Value) bool {
}
}
func rewriteValuePPC64_OpPPC64ADD(v *Value) bool {
// match: (ADD (SLDconst x [c]) (SRDconst x [d]))
// cond: d == 64-c
// result: (ROTLconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SLDconst {
break
}
c := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SRDconst {
break
}
d := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 64-c) {
break
}
v.reset(OpPPC64ROTLconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (ADD (SRDconst x [d]) (SLDconst x [c]))
// cond: d == 64-c
// result: (ROTLconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SRDconst {
break
}
d := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SLDconst {
break
}
c := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 64-c) {
break
}
v.reset(OpPPC64ROTLconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (ADD (SLWconst x [c]) (SRWconst x [d]))
// cond: d == 32-c
// result: (ROTLWconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SLWconst {
break
}
c := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SRWconst {
break
}
d := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 32-c) {
break
}
v.reset(OpPPC64ROTLWconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (ADD (SRWconst x [d]) (SLWconst x [c]))
// cond: d == 32-c
// result: (ROTLWconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SRWconst {
break
}
d := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SLWconst {
break
}
c := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 32-c) {
break
}
v.reset(OpPPC64ROTLWconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (ADD x (MOVDconst [c]))
// cond: is32Bit(c)
// result: (ADDconst [c] x)
......@@ -7362,6 +7466,110 @@ func rewriteValuePPC64_OpPPC64NotEqual(v *Value) bool {
return false
}
func rewriteValuePPC64_OpPPC64OR(v *Value) bool {
// match: (OR (SLDconst x [c]) (SRDconst x [d]))
// cond: d == 64-c
// result: (ROTLconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SLDconst {
break
}
c := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SRDconst {
break
}
d := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 64-c) {
break
}
v.reset(OpPPC64ROTLconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (OR (SRDconst x [d]) (SLDconst x [c]))
// cond: d == 64-c
// result: (ROTLconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SRDconst {
break
}
d := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SLDconst {
break
}
c := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 64-c) {
break
}
v.reset(OpPPC64ROTLconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (OR (SLWconst x [c]) (SRWconst x [d]))
// cond: d == 32-c
// result: (ROTLWconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SLWconst {
break
}
c := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SRWconst {
break
}
d := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 32-c) {
break
}
v.reset(OpPPC64ROTLWconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (OR (SRWconst x [d]) (SLWconst x [c]))
// cond: d == 32-c
// result: (ROTLWconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SRWconst {
break
}
d := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SLWconst {
break
}
c := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 32-c) {
break
}
v.reset(OpPPC64ROTLWconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (OR (MOVDconst [c]) (MOVDconst [d]))
// cond:
// result: (MOVDconst [c|d])
......@@ -7521,6 +7729,110 @@ func rewriteValuePPC64_OpPPC64SUB(v *Value) bool {
return false
}
func rewriteValuePPC64_OpPPC64XOR(v *Value) bool {
// match: (XOR (SLDconst x [c]) (SRDconst x [d]))
// cond: d == 64-c
// result: (ROTLconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SLDconst {
break
}
c := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SRDconst {
break
}
d := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 64-c) {
break
}
v.reset(OpPPC64ROTLconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (XOR (SRDconst x [d]) (SLDconst x [c]))
// cond: d == 64-c
// result: (ROTLconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SRDconst {
break
}
d := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SLDconst {
break
}
c := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 64-c) {
break
}
v.reset(OpPPC64ROTLconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (XOR (SLWconst x [c]) (SRWconst x [d]))
// cond: d == 32-c
// result: (ROTLWconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SLWconst {
break
}
c := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SRWconst {
break
}
d := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 32-c) {
break
}
v.reset(OpPPC64ROTLWconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (XOR (SRWconst x [d]) (SLWconst x [c]))
// cond: d == 32-c
// result: (ROTLWconst [c] x)
for {
v_0 := v.Args[0]
if v_0.Op != OpPPC64SRWconst {
break
}
d := v_0.AuxInt
x := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64SLWconst {
break
}
c := v_1.AuxInt
if x != v_1.Args[0] {
break
}
if !(d == 32-c) {
break
}
v.reset(OpPPC64ROTLWconst)
v.AuxInt = c
v.AddArg(x)
return true
}
// match: (XOR (MOVDconst [c]) (MOVDconst [d]))
// cond:
// result: (MOVDconst [c^d])
......
......@@ -680,6 +680,8 @@ const (
ARLDCLCC
ARLDICL
ARLDICLCC
AROTL
AROTLW
ASLBIA
ASLBIE
ASLBMFEE
......
......@@ -301,6 +301,8 @@ var Anames = []string{
"RLDCLCC",
"RLDICL",
"RLDICLCC",
"ROTL",
"ROTLW",
"SLBIA",
"SLBIE",
"SLBMFEE",
......
......@@ -1655,11 +1655,13 @@ func buildop(ctxt *obj.Link) {
opset(ASLWCC, r0)
opset(ASRW, r0)
opset(ASRWCC, r0)
opset(AROTLW, r0)
case ASLD:
opset(ASLDCC, r0)
opset(ASRD, r0)
opset(ASRDCC, r0)
opset(AROTL, r0)
case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */
opset(ASRAWCC, r0)
......@@ -1971,10 +1973,12 @@ const (
OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0
OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0
OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0
OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0
OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0
OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0
OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0
OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0
OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0
)
func oclass(a *obj.Addr) int {
......@@ -2258,7 +2262,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
if r == 0 {
r = int(p.To.Reg)
}
// AROTL and AROTLW are extended mnemonics, which map to RLDCL and RLWNM.
switch p.As {
case AROTL:
o1 = AOP_RLDIC(OP_RLDCL, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), uint32(0))
case AROTLW:
o1 = OP_RLW(OP_RLWNM, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), 0, 31)
default:
o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
}
case 7: /* mov r, soreg ==> stw o(r) */
r := int(p.To.Reg)
......@@ -2636,32 +2648,28 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
r = int(p.To.Reg)
}
var a int
op := uint32(0)
switch p.As {
case ASLD, ASLDCC:
a = int(63 - v)
o1 = OP_RLDICR
op = OP_RLDICR
case ASRD, ASRDCC:
a = int(v)
v = 64 - v
o1 = OP_RLDICL
op = OP_RLDICL
case AROTL:
a = int(0)
op = OP_RLDICL
default:
c.ctxt.Diag("unexpected op in sldi case\n%v", p)
a = 0
o1 = 0
}
o1 = AOP_RRR(o1, uint32(r), uint32(p.To.Reg), (uint32(v) & 0x1F))
o1 |= (uint32(a) & 31) << 6
if v&0x20 != 0 {
o1 |= 1 << 1
}
if a&0x20 != 0 {
o1 |= 1 << 5 /* mb[5] is top bit */
}
o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a))
if p.As == ASLDCC || p.As == ASRDCC {
o1 |= 1 /* Rc */
o1 |= 1 // Set the condition code bit
}
case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */
......@@ -2978,18 +2986,18 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
v = 32
}
var mask [2]uint8
if p.As == ASRW || p.As == ASRWCC { /* shift right */
mask[0] = uint8(v)
mask[1] = 31
switch p.As {
case AROTLW:
mask[0], mask[1] = 0, 31
case ASRW, ASRWCC:
mask[0], mask[1] = uint8(v), 31
v = 32 - v
} else {
mask[0] = 0
mask[1] = uint8(31 - v)
default:
mask[0], mask[1] = 0, uint8(31-v)
}
o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(r), uint32(v), uint32(mask[0]), uint32(mask[1]))
if p.As == ASLWCC || p.As == ASRWCC {
o1 |= 1 /* Rc */
o1 |= 1 // set the condition code
}
case 58: /* logical $andcon,[s],a */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment