Commit 23bd9191 authored by Lynn Boger's avatar Lynn Boger

cmd/compile: improve LoweredZero performance for ppc64x

This change improves the performance of the LoweredZero rule
on ppc64x.

The improvement can be seen in the runtime ClearFat
benchmarks:

BenchmarkClearFat12-16       2.40          0.69          -71.25%
BenchmarkClearFat16-16       9.98          0.93          -90.68%
BenchmarkClearFat24-16       4.75          0.93          -80.42%
BenchmarkClearFat32-16       6.02          0.93          -84.55%
BenchmarkClearFat40-16       7.19          1.16          -83.87%
BenchmarkClearFat48-16       15.0          1.39          -90.73%
BenchmarkClearFat56-16       9.95          1.62          -83.72%
BenchmarkClearFat64-16       18.0          1.86          -89.67%
BenchmarkClearFat128-16      30.0          8.08          -73.07%
BenchmarkClearFat256-16      52.5          11.3          -78.48%
BenchmarkClearFat512-16      97.0          19.0          -80.41%
BenchmarkClearFat1024-16     244           34.2          -85.98%

Fixes: #19532

Change-Id: If493e28bc1d8e61bc79978498be9f5336a36cd3f
Reviewed-on: https://go-review.googlesource.com/38096
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarMichael Munday <munday@ca.ibm.com>
parent d972dc2d
...@@ -831,62 +831,135 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -831,62 +831,135 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssaGenISEL(v, ppc64.C_COND_EQ, iselRegs[1], v.Reg()) ssaGenISEL(v, ppc64.C_COND_EQ, iselRegs[1], v.Reg())
case ssa.OpPPC64LoweredZero: case ssa.OpPPC64LoweredZero:
// Similar to how this is done on ARM,
// except that PPC MOVDU x,off(y) is *(y+off) = x; y=y+off // unaligned data doesn't hurt performance
// not store-and-increment. // for these instructions on power8 or later
// Therefore R3 should be dest-align
// and arg1 should be dest+size-align // for sizes >= 64 generate a loop as follows:
// HOWEVER, the input dest address cannot be dest-align because
// that does not necessarily address valid memory and it's not // set up loop counter in CTR, used by BC
// known how that might be optimized. Therefore, correct it in // MOVD len/32,REG_TMP
// in the expansion: // MOVD REG_TMP,CTR
// loop:
// MOVD R0,(R3)
// MOVD R0,8(R3)
// MOVD R0,16(R3)
// MOVD R0,24(R3)
// ADD $32,R3
// BC 16, 0, loop
// //
// ADD -8,R3,R3 // any remainder is done as described below
// MOVDU R0, 8(R3)
// CMP R3, Rarg1 // for sizes < 64 bytes, first clear as many doublewords as possible,
// BL -2(PC) // then handle the remainder
// arg1 is the address of the last element to zero // MOVD R0,(R3)
// auxint is alignment // MOVD R0,8(R3)
var sz int64 // .... etc.
var movu obj.As //
switch { // the remainder bytes are cleared using one or more
case v.AuxInt%8 == 0: // of the following instructions with the appropriate
sz = 8 // offsets depending which instructions are needed
movu = ppc64.AMOVDU //
case v.AuxInt%4 == 0: // MOVW R0,n1(R3) 4 bytes
sz = 4 // MOVH R0,n2(R3) 2 bytes
movu = ppc64.AMOVWZU // MOVWU instruction not implemented // MOVB R0,n3(R3) 1 byte
case v.AuxInt%2 == 0: //
sz = 2 // 7 bytes: MOVW, MOVH, MOVB
movu = ppc64.AMOVHU // 6 bytes: MOVW, MOVH
default: // 5 bytes: MOVW, MOVB
sz = 1 // 3 bytes: MOVH, MOVB
movu = ppc64.AMOVBU
// each loop iteration does 32 bytes
ctr := v.AuxInt / 32
// remainder bytes
rem := v.AuxInt % 32
// only generate a loop if there is more
// than 1 iteration.
if ctr > 1 {
// Set up CTR loop counter
p := gc.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_CONST
p.From.Offset = ctr
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP
p = gc.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REGTMP
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REG_CTR
// generate 4 MOVDs
// when this is a loop then the top must be saved
var top *obj.Prog
for offset := int64(0); offset < 32; offset += 8 {
// This is the top of loop
p := gc.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REG_R0
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
p.To.Offset = offset
// Save the top of loop
if top == nil {
top = p
}
} }
p := gc.Prog(ppc64.AADD) // Increment address for the
// 4 doublewords just zeroed.
p = gc.Prog(ppc64.AADD)
p.Reg = v.Args[0].Reg() p.Reg = v.Args[0].Reg()
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
p.From.Offset = -sz p.From.Offset = 32
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
p = gc.Prog(movu) // Branch back to top of loop
// based on CTR
// BC with BO_BCTR generates bdnz
p = gc.Prog(ppc64.ABC)
p.From.Type = obj.TYPE_CONST
p.From.Offset = ppc64.BO_BCTR
p.Reg = ppc64.REG_R0
p.To.Type = obj.TYPE_BRANCH
gc.Patch(p, top)
}
// when ctr == 1 the loop was not generated but
// there are at least 32 bytes to clear, so add
// that to the remainder to generate the code
// to clear those doublewords
if ctr == 1 {
rem += 32
}
// clear the remainder starting at offset zero
offset := int64(0)
// first clear as many doublewords as possible
// then clear remaining sizes as available
for rem > 0 {
op, size := ppc64.AMOVB, int64(1)
switch {
case rem >= 8:
op, size = ppc64.AMOVD, 8
case rem >= 4:
op, size = ppc64.AMOVW, 4
case rem >= 2:
op, size = ppc64.AMOVH, 2
}
p := gc.Prog(op)
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REG_R0 p.From.Reg = ppc64.REG_R0
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
p.To.Offset = sz p.To.Offset = offset
rem -= size
p2 := gc.Prog(ppc64.ACMPU) offset += size
p2.From.Type = obj.TYPE_REG }
p2.From.Reg = v.Args[0].Reg()
p2.To.Reg = v.Args[1].Reg()
p2.To.Type = obj.TYPE_REG
p3 := gc.Prog(ppc64.ABLT)
p3.To.Type = obj.TYPE_BRANCH
gc.Patch(p3, p)
case ssa.OpPPC64LoweredMove: case ssa.OpPPC64LoweredMove:
// Similar to how this is done on ARM, // Similar to how this is done on ARM,
......
...@@ -485,60 +485,73 @@ ...@@ -485,60 +485,73 @@
(Store {t} ptr val mem) && t.(Type).Size() == 2 -> (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.(Type).Size() == 2 -> (MOVHstore ptr val mem)
(Store {t} ptr val mem) && t.(Type).Size() == 1 -> (MOVBstore ptr val mem) (Store {t} ptr val mem) && t.(Type).Size() == 1 -> (MOVBstore ptr val mem)
// Using Zero instead of LoweredZero allows the
// target address to be folded where possible.
(Zero [0] _ mem) -> mem (Zero [0] _ mem) -> mem
(Zero [1] destptr mem) -> (MOVBstorezero destptr mem) (Zero [1] destptr mem) -> (MOVBstorezero destptr mem)
(Zero [2] {t} destptr mem) && t.(Type).Alignment()%2 == 0 ->
(MOVHstorezero destptr mem)
(Zero [2] destptr mem) -> (Zero [2] destptr mem) ->
(MOVBstorezero [1] destptr (MOVHstorezero destptr mem)
(MOVBstorezero [0] destptr mem))
(Zero [4] {t} destptr mem) && t.(Type).Alignment()%4 == 0 ->
(MOVWstorezero destptr mem)
(Zero [4] {t} destptr mem) && t.(Type).Alignment()%2 == 0 ->
(MOVHstorezero [2] destptr
(MOVHstorezero [0] destptr mem))
(Zero [4] destptr mem) ->
(MOVBstorezero [3] destptr
(MOVBstorezero [2] destptr
(MOVBstorezero [1] destptr
(MOVBstorezero [0] destptr mem))))
(Zero [8] {t} destptr mem) && t.(Type).Alignment()%8 == 0 ->
(MOVDstorezero [0] destptr mem)
(Zero [8] {t} destptr mem) && t.(Type).Alignment()%4 == 0 ->
(MOVWstorezero [4] destptr
(MOVWstorezero [0] destptr mem))
(Zero [8] {t} destptr mem) && t.(Type).Alignment()%2 == 0 ->
(MOVHstorezero [6] destptr
(MOVHstorezero [4] destptr
(MOVHstorezero [2] destptr
(MOVHstorezero [0] destptr mem))))
(Zero [3] destptr mem) -> (Zero [3] destptr mem) ->
(MOVBstorezero [2] destptr (MOVBstorezero [2] destptr
(MOVBstorezero [1] destptr (MOVHstorezero destptr mem))
(MOVBstorezero [0] destptr mem))) (Zero [4] destptr mem) ->
(MOVWstorezero destptr mem)
(Zero [5] destptr mem) ->
(MOVBstorezero [4] destptr
(MOVWstorezero destptr mem))
(Zero [6] destptr mem) ->
(MOVHstorezero [4] destptr
(MOVWstorezero destptr mem))
(Zero [7] destptr mem) ->
(MOVBstorezero [6] destptr
(MOVHstorezero [4] destptr
(MOVWstorezero destptr mem)))
(Zero [8] destptr mem) ->
(MOVDstorezero destptr mem)
// Zero small numbers of words directly. // Zero small numbers of words directly.
(Zero [16] {t} destptr mem) && t.(Type).Alignment()%8 == 0 -> (Zero [12] destptr mem) ->
(MOVWstorezero [8] destptr
(MOVDstorezero [0] destptr mem))
(Zero [16] destptr mem) ->
(MOVDstorezero [8] destptr (MOVDstorezero [8] destptr
(MOVDstorezero [0] destptr mem)) (MOVDstorezero [0] destptr mem))
(Zero [24] {t} destptr mem) && t.(Type).Alignment()%8 == 0 -> (Zero [24] destptr mem) ->
(MOVDstorezero [16] destptr (MOVDstorezero [16] destptr
(MOVDstorezero [8] destptr (MOVDstorezero [8] destptr
(MOVDstorezero [0] destptr mem))) (MOVDstorezero [0] destptr mem)))
(Zero [32] {t} destptr mem) && t.(Type).Alignment()%8 == 0 -> (Zero [32] destptr mem) ->
(MOVDstorezero [24] destptr (MOVDstorezero [24] destptr
(MOVDstorezero [16] destptr (MOVDstorezero [16] destptr
(MOVDstorezero [8] destptr (MOVDstorezero [8] destptr
(MOVDstorezero [0] destptr mem)))) (MOVDstorezero [0] destptr mem))))
// Large zeroing uses a loop (Zero [40] destptr mem) ->
(Zero [s] {t} ptr mem) (MOVDstorezero [32] destptr
&& (s > 512 || config.noDuffDevice) || t.(Type).Alignment()%8 != 0 -> (MOVDstorezero [24] destptr
(LoweredZero [t.(Type).Alignment()] (MOVDstorezero [16] destptr
ptr (MOVDstorezero [8] destptr
(ADDconst <ptr.Type> ptr [s-moveSize(t.(Type).Alignment(), config)]) (MOVDstorezero [0] destptr mem)))))
mem)
(Zero [48] destptr mem) ->
(MOVDstorezero [40] destptr
(MOVDstorezero [32] destptr
(MOVDstorezero [24] destptr
(MOVDstorezero [16] destptr
(MOVDstorezero [8] destptr
(MOVDstorezero [0] destptr mem))))))
(Zero [56] destptr mem) ->
(MOVDstorezero [48] destptr
(MOVDstorezero [40] destptr
(MOVDstorezero [32] destptr
(MOVDstorezero [24] destptr
(MOVDstorezero [16] destptr
(MOVDstorezero [8] destptr
(MOVDstorezero [0] destptr mem)))))))
// Handle cases not handled above
(Zero [s] ptr mem) -> (LoweredZero [s] ptr mem)
// moves // moves
(Move [0] _ _ mem) -> mem (Move [0] _ _ mem) -> mem
......
...@@ -312,19 +312,37 @@ func init() { ...@@ -312,19 +312,37 @@ func init() {
// large or unaligned zeroing // large or unaligned zeroing
// arg0 = address of memory to zero (in R3, changed as side effect) // arg0 = address of memory to zero (in R3, changed as side effect)
// arg1 = address of the last element to zero
// arg2 = mem
// returns mem // returns mem
// ADD -8,R3,R3 // intermediate value not valid GC ptr, cannot expose to opt+GC //
// MOVDU R0, 8(R3) // a loop is generated when there is more than one iteration
// CMP R3, Rarg1 // needed to clear 4 doublewords
// BLE -2(PC) //
// MOVD $len/32,R31
// MOVD R31,CTR
// loop:
// MOVD R0,(R3)
// MOVD R0,8(R3)
// MOVD R0,16(R3)
// MOVD R0,24(R3)
// ADD R3,32
// BC loop
// remaining doubleword clears generated as needed
// MOVD R0,(R3)
// MOVD R0,8(R3)
// MOVD R0,16(R3)
// MOVD R0,24(R3)
// one or more of these to clear remainder < 8 bytes
// MOVW R0,n1(R3)
// MOVH R0,n2(R3)
// MOVB R0,n3(R3)
{ {
name: "LoweredZero", name: "LoweredZero",
aux: "Int64", aux: "Int64",
argLength: 3, argLength: 2,
reg: regInfo{ reg: regInfo{
inputs: []regMask{buildReg("R3"), gp}, inputs: []regMask{buildReg("R3")},
clobbers: buildReg("R3"), clobbers: buildReg("R3"),
}, },
clobberFlags: true, clobberFlags: true,
......
...@@ -17368,13 +17368,12 @@ var opcodeTable = [...]opInfo{ ...@@ -17368,13 +17368,12 @@ var opcodeTable = [...]opInfo{
{ {
name: "LoweredZero", name: "LoweredZero",
auxType: auxInt64, auxType: auxInt64,
argLen: 3, argLen: 2,
clobberFlags: true, clobberFlags: true,
faultOnNilArg0: true, faultOnNilArg0: true,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 8}, // R3 {0, 8}, // R3
{1, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
}, },
clobbers: 8, // R3 clobbers: 8, // R3
}, },
......
...@@ -9656,8 +9656,6 @@ func rewriteValuePPC64_OpXor8(v *Value) bool { ...@@ -9656,8 +9656,6 @@ func rewriteValuePPC64_OpXor8(v *Value) bool {
func rewriteValuePPC64_OpZero(v *Value) bool { func rewriteValuePPC64_OpZero(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
config := b.Func.Config
_ = config
// match: (Zero [0] _ mem) // match: (Zero [0] _ mem)
// cond: // cond:
// result: mem // result: mem
...@@ -9685,200 +9683,178 @@ func rewriteValuePPC64_OpZero(v *Value) bool { ...@@ -9685,200 +9683,178 @@ func rewriteValuePPC64_OpZero(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (Zero [2] {t} destptr mem) // match: (Zero [2] destptr mem)
// cond: t.(Type).Alignment()%2 == 0 // cond:
// result: (MOVHstorezero destptr mem) // result: (MOVHstorezero destptr mem)
for { for {
if v.AuxInt != 2 { if v.AuxInt != 2 {
break break
} }
t := v.Aux
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(t.(Type).Alignment()%2 == 0) {
break
}
v.reset(OpPPC64MOVHstorezero) v.reset(OpPPC64MOVHstorezero)
v.AddArg(destptr) v.AddArg(destptr)
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (Zero [2] destptr mem) // match: (Zero [3] destptr mem)
// cond: // cond:
// result: (MOVBstorezero [1] destptr (MOVBstorezero [0] destptr mem)) // result: (MOVBstorezero [2] destptr (MOVHstorezero destptr mem))
for { for {
if v.AuxInt != 2 { if v.AuxInt != 3 {
break break
} }
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
v.reset(OpPPC64MOVBstorezero) v.reset(OpPPC64MOVBstorezero)
v.AuxInt = 1 v.AuxInt = 2
v.AddArg(destptr) v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem) v0 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem)
v0.AuxInt = 0
v0.AddArg(destptr) v0.AddArg(destptr)
v0.AddArg(mem) v0.AddArg(mem)
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
// match: (Zero [4] {t} destptr mem) // match: (Zero [4] destptr mem)
// cond: t.(Type).Alignment()%4 == 0 // cond:
// result: (MOVWstorezero destptr mem) // result: (MOVWstorezero destptr mem)
for { for {
if v.AuxInt != 4 { if v.AuxInt != 4 {
break break
} }
t := v.Aux
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(t.(Type).Alignment()%4 == 0) {
break
}
v.reset(OpPPC64MOVWstorezero) v.reset(OpPPC64MOVWstorezero)
v.AddArg(destptr) v.AddArg(destptr)
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (Zero [4] {t} destptr mem) // match: (Zero [5] destptr mem)
// cond: t.(Type).Alignment()%2 == 0 // cond:
// result: (MOVHstorezero [2] destptr (MOVHstorezero [0] destptr mem)) // result: (MOVBstorezero [4] destptr (MOVWstorezero destptr mem))
for { for {
if v.AuxInt != 4 { if v.AuxInt != 5 {
break break
} }
t := v.Aux
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(t.(Type).Alignment()%2 == 0) { v.reset(OpPPC64MOVBstorezero)
v.AuxInt = 4
v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, TypeMem)
v0.AddArg(destptr)
v0.AddArg(mem)
v.AddArg(v0)
return true
}
// match: (Zero [6] destptr mem)
// cond:
// result: (MOVHstorezero [4] destptr (MOVWstorezero destptr mem))
for {
if v.AuxInt != 6 {
break break
} }
destptr := v.Args[0]
mem := v.Args[1]
v.reset(OpPPC64MOVHstorezero) v.reset(OpPPC64MOVHstorezero)
v.AuxInt = 2 v.AuxInt = 4
v.AddArg(destptr) v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem) v0 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, TypeMem)
v0.AuxInt = 0
v0.AddArg(destptr) v0.AddArg(destptr)
v0.AddArg(mem) v0.AddArg(mem)
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
// match: (Zero [4] destptr mem) // match: (Zero [7] destptr mem)
// cond: // cond:
// result: (MOVBstorezero [3] destptr (MOVBstorezero [2] destptr (MOVBstorezero [1] destptr (MOVBstorezero [0] destptr mem)))) // result: (MOVBstorezero [6] destptr (MOVHstorezero [4] destptr (MOVWstorezero destptr mem)))
for { for {
if v.AuxInt != 4 { if v.AuxInt != 7 {
break break
} }
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
v.reset(OpPPC64MOVBstorezero) v.reset(OpPPC64MOVBstorezero)
v.AuxInt = 3 v.AuxInt = 6
v.AddArg(destptr) v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem) v0 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem)
v0.AuxInt = 2 v0.AuxInt = 4
v0.AddArg(destptr) v0.AddArg(destptr)
v1 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem) v1 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, TypeMem)
v1.AuxInt = 1
v1.AddArg(destptr) v1.AddArg(destptr)
v2 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem) v1.AddArg(mem)
v2.AuxInt = 0
v2.AddArg(destptr)
v2.AddArg(mem)
v1.AddArg(v2)
v0.AddArg(v1) v0.AddArg(v1)
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
// match: (Zero [8] {t} destptr mem) // match: (Zero [8] destptr mem)
// cond: t.(Type).Alignment()%8 == 0 // cond:
// result: (MOVDstorezero [0] destptr mem) // result: (MOVDstorezero destptr mem)
for { for {
if v.AuxInt != 8 { if v.AuxInt != 8 {
break break
} }
t := v.Aux
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(t.(Type).Alignment()%8 == 0) {
break
}
v.reset(OpPPC64MOVDstorezero) v.reset(OpPPC64MOVDstorezero)
v.AuxInt = 0
v.AddArg(destptr) v.AddArg(destptr)
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (Zero [8] {t} destptr mem) // match: (Zero [12] destptr mem)
// cond: t.(Type).Alignment()%4 == 0 // cond:
// result: (MOVWstorezero [4] destptr (MOVWstorezero [0] destptr mem)) // result: (MOVWstorezero [8] destptr (MOVDstorezero [0] destptr mem))
for { for {
if v.AuxInt != 8 { if v.AuxInt != 12 {
break break
} }
t := v.Aux
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(t.(Type).Alignment()%4 == 0) {
break
}
v.reset(OpPPC64MOVWstorezero) v.reset(OpPPC64MOVWstorezero)
v.AuxInt = 4 v.AuxInt = 8
v.AddArg(destptr) v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, TypeMem) v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v0.AuxInt = 0 v0.AuxInt = 0
v0.AddArg(destptr) v0.AddArg(destptr)
v0.AddArg(mem) v0.AddArg(mem)
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
// match: (Zero [8] {t} destptr mem) // match: (Zero [16] destptr mem)
// cond: t.(Type).Alignment()%2 == 0 // cond:
// result: (MOVHstorezero [6] destptr (MOVHstorezero [4] destptr (MOVHstorezero [2] destptr (MOVHstorezero [0] destptr mem)))) // result: (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))
for { for {
if v.AuxInt != 8 { if v.AuxInt != 16 {
break break
} }
t := v.Aux
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(t.(Type).Alignment()%2 == 0) { v.reset(OpPPC64MOVDstorezero)
break v.AuxInt = 8
}
v.reset(OpPPC64MOVHstorezero)
v.AuxInt = 6
v.AddArg(destptr) v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem) v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v0.AuxInt = 4 v0.AuxInt = 0
v0.AddArg(destptr) v0.AddArg(destptr)
v1 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem) v0.AddArg(mem)
v1.AuxInt = 2
v1.AddArg(destptr)
v2 := b.NewValue0(v.Pos, OpPPC64MOVHstorezero, TypeMem)
v2.AuxInt = 0
v2.AddArg(destptr)
v2.AddArg(mem)
v1.AddArg(v2)
v0.AddArg(v1)
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
// match: (Zero [3] destptr mem) // match: (Zero [24] destptr mem)
// cond: // cond:
// result: (MOVBstorezero [2] destptr (MOVBstorezero [1] destptr (MOVBstorezero [0] destptr mem))) // result: (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))
for { for {
if v.AuxInt != 3 { if v.AuxInt != 24 {
break break
} }
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
v.reset(OpPPC64MOVBstorezero) v.reset(OpPPC64MOVDstorezero)
v.AuxInt = 2 v.AuxInt = 16
v.AddArg(destptr) v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem) v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v0.AuxInt = 1 v0.AuxInt = 8
v0.AddArg(destptr) v0.AddArg(destptr)
v1 := b.NewValue0(v.Pos, OpPPC64MOVBstorezero, TypeMem) v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v1.AuxInt = 0 v1.AuxInt = 0
v1.AddArg(destptr) v1.AddArg(destptr)
v1.AddArg(mem) v1.AddArg(mem)
...@@ -9886,109 +9862,151 @@ func rewriteValuePPC64_OpZero(v *Value) bool { ...@@ -9886,109 +9862,151 @@ func rewriteValuePPC64_OpZero(v *Value) bool {
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
// match: (Zero [16] {t} destptr mem) // match: (Zero [32] destptr mem)
// cond: t.(Type).Alignment()%8 == 0 // cond:
// result: (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)) // result: (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))))
for { for {
if v.AuxInt != 16 { if v.AuxInt != 32 {
break break
} }
t := v.Aux
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(t.(Type).Alignment()%8 == 0) {
break
}
v.reset(OpPPC64MOVDstorezero) v.reset(OpPPC64MOVDstorezero)
v.AuxInt = 8 v.AuxInt = 24
v.AddArg(destptr) v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem) v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v0.AuxInt = 0 v0.AuxInt = 16
v0.AddArg(destptr) v0.AddArg(destptr)
v0.AddArg(mem) v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v1.AuxInt = 8
v1.AddArg(destptr)
v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v2.AuxInt = 0
v2.AddArg(destptr)
v2.AddArg(mem)
v1.AddArg(v2)
v0.AddArg(v1)
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
// match: (Zero [24] {t} destptr mem) // match: (Zero [40] destptr mem)
// cond: t.(Type).Alignment()%8 == 0 // cond:
// result: (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))) // result: (MOVDstorezero [32] destptr (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))))
for { for {
if v.AuxInt != 24 { if v.AuxInt != 40 {
break break
} }
t := v.Aux
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(t.(Type).Alignment()%8 == 0) {
break
}
v.reset(OpPPC64MOVDstorezero) v.reset(OpPPC64MOVDstorezero)
v.AuxInt = 16 v.AuxInt = 32
v.AddArg(destptr) v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem) v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v0.AuxInt = 8 v0.AuxInt = 24
v0.AddArg(destptr) v0.AddArg(destptr)
v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem) v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v1.AuxInt = 0 v1.AuxInt = 16
v1.AddArg(destptr) v1.AddArg(destptr)
v1.AddArg(mem) v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v2.AuxInt = 8
v2.AddArg(destptr)
v3 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v3.AuxInt = 0
v3.AddArg(destptr)
v3.AddArg(mem)
v2.AddArg(v3)
v1.AddArg(v2)
v0.AddArg(v1) v0.AddArg(v1)
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
// match: (Zero [32] {t} destptr mem) // match: (Zero [48] destptr mem)
// cond: t.(Type).Alignment()%8 == 0 // cond:
// result: (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))) // result: (MOVDstorezero [40] destptr (MOVDstorezero [32] destptr (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))))))
for { for {
if v.AuxInt != 32 { if v.AuxInt != 48 {
break break
} }
t := v.Aux
destptr := v.Args[0] destptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(t.(Type).Alignment()%8 == 0) { v.reset(OpPPC64MOVDstorezero)
v.AuxInt = 40
v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v0.AuxInt = 32
v0.AddArg(destptr)
v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v1.AuxInt = 24
v1.AddArg(destptr)
v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v2.AuxInt = 16
v2.AddArg(destptr)
v3 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v3.AuxInt = 8
v3.AddArg(destptr)
v4 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v4.AuxInt = 0
v4.AddArg(destptr)
v4.AddArg(mem)
v3.AddArg(v4)
v2.AddArg(v3)
v1.AddArg(v2)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
// match: (Zero [56] destptr mem)
// cond:
// result: (MOVDstorezero [48] destptr (MOVDstorezero [40] destptr (MOVDstorezero [32] destptr (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))))))
for {
if v.AuxInt != 56 {
break break
} }
destptr := v.Args[0]
mem := v.Args[1]
v.reset(OpPPC64MOVDstorezero) v.reset(OpPPC64MOVDstorezero)
v.AuxInt = 24 v.AuxInt = 48
v.AddArg(destptr) v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem) v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v0.AuxInt = 16 v0.AuxInt = 40
v0.AddArg(destptr) v0.AddArg(destptr)
v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem) v1 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v1.AuxInt = 8 v1.AuxInt = 32
v1.AddArg(destptr) v1.AddArg(destptr)
v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem) v2 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v2.AuxInt = 0 v2.AuxInt = 24
v2.AddArg(destptr) v2.AddArg(destptr)
v2.AddArg(mem) v3 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v3.AuxInt = 16
v3.AddArg(destptr)
v4 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v4.AuxInt = 8
v4.AddArg(destptr)
v5 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, TypeMem)
v5.AuxInt = 0
v5.AddArg(destptr)
v5.AddArg(mem)
v4.AddArg(v5)
v3.AddArg(v4)
v2.AddArg(v3)
v1.AddArg(v2) v1.AddArg(v2)
v0.AddArg(v1) v0.AddArg(v1)
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
// match: (Zero [s] {t} ptr mem) // match: (Zero [s] ptr mem)
// cond: (s > 512 || config.noDuffDevice) || t.(Type).Alignment()%8 != 0 // cond:
// result: (LoweredZero [t.(Type).Alignment()] ptr (ADDconst <ptr.Type> ptr [s-moveSize(t.(Type).Alignment(), config)]) mem) // result: (LoweredZero [s] ptr mem)
for { for {
s := v.AuxInt s := v.AuxInt
t := v.Aux
ptr := v.Args[0] ptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !((s > 512 || config.noDuffDevice) || t.(Type).Alignment()%8 != 0) {
break
}
v.reset(OpPPC64LoweredZero) v.reset(OpPPC64LoweredZero)
v.AuxInt = t.(Type).Alignment() v.AuxInt = s
v.AddArg(ptr) v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpPPC64ADDconst, ptr.Type)
v0.AuxInt = s - moveSize(t.(Type).Alignment(), config)
v0.AddArg(ptr)
v.AddArg(v0)
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
return false
} }
func rewriteValuePPC64_OpZeroExt16to32(v *Value) bool { func rewriteValuePPC64_OpZeroExt16to32(v *Value) bool {
// match: (ZeroExt16to32 x) // match: (ZeroExt16to32 x)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment