Commit 40e25895 authored by Lynn Boger's avatar Lynn Boger

cmd/compile,math: improve int<->float conversions on ppc64x

The functions Float64bits and Float64frombits perform
poorly on ppc64x because the int<->float conversions
often result in load and store sequences to handle the
type change. This patch adds more rules to recognize
those sequences and use register to register moves
and avoid unnecessary loads and stores where possible.

There were some existing rules to improve these conversions,
but this provides additional improvements. Included here:

- New instruction FCFIDS to improve on conversion to 32 bit
- Rename Xf2i64 and Xi2f64 as MTVSRD, MFVSRD, to match the asm
- Add rules to lower some of the load/store sequences for
- Added new go asm to ppc64.s testcase.
conversions

Improvements:

BenchmarkAbs-16                2.16          0.93          -56.94%
BenchmarkCopysign-16           2.66          1.18          -55.64%
BenchmarkRound-16              4.82          2.69          -44.19%
BenchmarkSignbit-16            1.71          1.14          -33.33%
BenchmarkFrexp-16              11.4          7.94          -30.35%
BenchmarkLogb-16               10.4          7.34          -29.42%
BenchmarkLdexp-16              15.7          11.2          -28.66%
BenchmarkIlogb-16              10.2          7.32          -28.24%
BenchmarkPowInt-16             69.6          55.9          -19.68%
BenchmarkModf-16               10.1          8.19          -18.91%
BenchmarkLog2-16               17.4          14.3          -17.82%
BenchmarkCbrt-16               45.0          37.3          -17.11%
BenchmarkAtanh-16              57.6          48.3          -16.15%
BenchmarkRemainder-16          76.6          65.4          -14.62%
BenchmarkGamma-16              26.0          22.5          -13.46%
BenchmarkPowFrac-16            197           174           -11.68%
BenchmarkMod-16                112           99.8          -10.89%
BenchmarkAsinh-16              59.9          53.7          -10.35%
BenchmarkAcosh-16              44.8          40.3          -10.04%

Updates #21390

Change-Id: I56cc991fc2e55249d69518d4e1ba76cc23904e35
Reviewed-on: https://go-review.googlesource.com/63290Reviewed-by: default avatarMichael Munday <mike.munday@ibm.com>
parent f351dbfa
...@@ -550,6 +550,14 @@ label1: ...@@ -550,6 +550,14 @@ label1:
// ftsqrt BF, FRB // ftsqrt BF, FRB
FTSQRT F2,$7 FTSQRT F2,$7
// FCFID
// FCFIDS
FCFID F2,F3
FCFIDCC F3,F3
FCFIDS F2,F3
FCFIDSCC F2,F3
// //
// CMP // CMP
// //
......
...@@ -152,29 +152,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -152,29 +152,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = y p.To.Reg = y
} }
case ssa.OpPPC64Xf2i64:
{
x := v.Args[0].Reg()
y := v.Reg()
p := s.Prog(ppc64.AMFVSRD)
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = y
}
case ssa.OpPPC64Xi2f64:
{
x := v.Args[0].Reg()
y := v.Reg()
p := s.Prog(ppc64.AMTVSRD)
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = y
}
case ssa.OpPPC64LoweredAtomicAnd8, case ssa.OpPPC64LoweredAtomicAnd8,
ssa.OpPPC64LoweredAtomicOr8: ssa.OpPPC64LoweredAtomicOr8:
// SYNC // SYNC
...@@ -597,7 +574,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -597,7 +574,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect. p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB: case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD:
r := v.Reg() r := v.Reg()
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
......
...@@ -57,19 +57,25 @@ ...@@ -57,19 +57,25 @@
(Div64F x y) -> (FDIV x y) (Div64F x y) -> (FDIV x y)
// Lowering float <-> int // Lowering float <-> int
(Cvt32to32F x) -> (FRSP (FCFID (Xi2f64 (SignExt32to64 x)))) (Cvt32to32F x) -> (FCFIDS (MTVSRD (SignExt32to64 x)))
(Cvt32to64F x) -> (FCFID (Xi2f64 (SignExt32to64 x))) (Cvt32to64F x) -> (FCFID (MTVSRD (SignExt32to64 x)))
(Cvt64to32F x) -> (FRSP (FCFID (Xi2f64 x))) (Cvt64to32F x) -> (FCFIDS (MTVSRD x))
(Cvt64to64F x) -> (FCFID (Xi2f64 x)) (Cvt64to64F x) -> (FCFID (MTVSRD x))
(Cvt32Fto32 x) -> (Xf2i64 (FCTIWZ x)) (Cvt32Fto32 x) -> (MFVSRD (FCTIWZ x))
(Cvt32Fto64 x) -> (Xf2i64 (FCTIDZ x)) (Cvt32Fto64 x) -> (MFVSRD (FCTIDZ x))
(Cvt64Fto32 x) -> (Xf2i64 (FCTIWZ x)) (Cvt64Fto32 x) -> (MFVSRD (FCTIWZ x))
(Cvt64Fto64 x) -> (Xf2i64 (FCTIDZ x)) (Cvt64Fto64 x) -> (MFVSRD (FCTIDZ x))
(Cvt32Fto64F x) -> x // Note x will have the wrong type for patterns dependent on Float32/Float64 (Cvt32Fto64F x) -> x // Note x will have the wrong type for patterns dependent on Float32/Float64
(Cvt64Fto32F x) -> (FRSP x) (Cvt64Fto32F x) -> (FRSP x)
(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) -> (MFVSRD x)
(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr x _)) -> (MTVSRD x)
(FMOVDstore [off] {sym} ptr (MTVSRD x) mem) -> (MOVDstore [off] {sym} ptr x mem)
(MOVDstore [off] {sym} ptr (MFVSRD x) mem) -> (FMOVDstore [off] {sym} ptr x mem)
(Round32F x) -> (LoweredRound32F x) (Round32F x) -> (LoweredRound32F x)
(Round64F x) -> (LoweredRound64F x) (Round64F x) -> (LoweredRound64F x)
......
...@@ -223,6 +223,7 @@ func init() { ...@@ -223,6 +223,7 @@ func init() {
{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero {name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero {name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"}, // convert 64-bit integer to float {name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"}, // convert 64-bit integer to float
{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"}, // round float to 32-bit value {name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"}, // round float to 32-bit value
// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC. // Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
...@@ -231,8 +232,8 @@ func init() { ...@@ -231,8 +232,8 @@ func init() {
// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use // There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
// the word-load instructions. (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr) // the word-load instructions. (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
{name: "Xf2i64", argLength: 1, reg: fpgp, typ: "Int64"}, // move 64 bits of F register into G register {name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"}, // move 64 bits of F register into G register
{name: "Xi2f64", argLength: 1, reg: gpfp, typ: "Float64"}, // move 64 bits of G register into F register {name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1 {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1 {name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1
......
...@@ -1333,9 +1333,10 @@ const ( ...@@ -1333,9 +1333,10 @@ const (
OpPPC64FCTIDZ OpPPC64FCTIDZ
OpPPC64FCTIWZ OpPPC64FCTIWZ
OpPPC64FCFID OpPPC64FCFID
OpPPC64FCFIDS
OpPPC64FRSP OpPPC64FRSP
OpPPC64Xf2i64 OpPPC64MFVSRD
OpPPC64Xi2f64 OpPPC64MTVSRD
OpPPC64AND OpPPC64AND
OpPPC64ANDN OpPPC64ANDN
OpPPC64OR OpPPC64OR
...@@ -17086,6 +17087,19 @@ var opcodeTable = [...]opInfo{ ...@@ -17086,6 +17087,19 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "FCFIDS",
argLen: 1,
asm: ppc64.AFCFIDS,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
outputs: []outputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
},
{ {
name: "FRSP", name: "FRSP",
argLen: 1, argLen: 1,
...@@ -17100,8 +17114,9 @@ var opcodeTable = [...]opInfo{ ...@@ -17100,8 +17114,9 @@ var opcodeTable = [...]opInfo{
}, },
}, },
{ {
name: "Xf2i64", name: "MFVSRD",
argLen: 1, argLen: 1,
asm: ppc64.AMFVSRD,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 {0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
...@@ -17112,8 +17127,9 @@ var opcodeTable = [...]opInfo{ ...@@ -17112,8 +17127,9 @@ var opcodeTable = [...]opInfo{
}, },
}, },
{ {
name: "Xi2f64", name: "MTVSRD",
argLen: 1, argLen: 1,
asm: ppc64.AMTVSRD,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
......
...@@ -1307,10 +1307,10 @@ func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool { ...@@ -1307,10 +1307,10 @@ func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
_ = typ _ = typ
// match: (Cvt32Fto32 x) // match: (Cvt32Fto32 x)
// cond: // cond:
// result: (Xf2i64 (FCTIWZ x)) // result: (MFVSRD (FCTIWZ x))
for { for {
x := v.Args[0] x := v.Args[0]
v.reset(OpPPC64Xf2i64) v.reset(OpPPC64MFVSRD)
v0 := b.NewValue0(v.Pos, OpPPC64FCTIWZ, typ.Float64) v0 := b.NewValue0(v.Pos, OpPPC64FCTIWZ, typ.Float64)
v0.AddArg(x) v0.AddArg(x)
v.AddArg(v0) v.AddArg(v0)
...@@ -1324,10 +1324,10 @@ func rewriteValuePPC64_OpCvt32Fto64_0(v *Value) bool { ...@@ -1324,10 +1324,10 @@ func rewriteValuePPC64_OpCvt32Fto64_0(v *Value) bool {
_ = typ _ = typ
// match: (Cvt32Fto64 x) // match: (Cvt32Fto64 x)
// cond: // cond:
// result: (Xf2i64 (FCTIDZ x)) // result: (MFVSRD (FCTIDZ x))
for { for {
x := v.Args[0] x := v.Args[0]
v.reset(OpPPC64Xf2i64) v.reset(OpPPC64MFVSRD)
v0 := b.NewValue0(v.Pos, OpPPC64FCTIDZ, typ.Float64) v0 := b.NewValue0(v.Pos, OpPPC64FCTIDZ, typ.Float64)
v0.AddArg(x) v0.AddArg(x)
v.AddArg(v0) v.AddArg(v0)
...@@ -1353,15 +1353,13 @@ func rewriteValuePPC64_OpCvt32to32F_0(v *Value) bool { ...@@ -1353,15 +1353,13 @@ func rewriteValuePPC64_OpCvt32to32F_0(v *Value) bool {
_ = typ _ = typ
// match: (Cvt32to32F x) // match: (Cvt32to32F x)
// cond: // cond:
// result: (FRSP (FCFID (Xi2f64 (SignExt32to64 x)))) // result: (FCFIDS (MTVSRD (SignExt32to64 x)))
for { for {
x := v.Args[0] x := v.Args[0]
v.reset(OpPPC64FRSP) v.reset(OpPPC64FCFIDS)
v0 := b.NewValue0(v.Pos, OpPPC64FCFID, typ.Float64) v0 := b.NewValue0(v.Pos, OpPPC64MTVSRD, typ.Float64)
v1 := b.NewValue0(v.Pos, OpPPC64Xi2f64, typ.Float64) v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v2 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) v1.AddArg(x)
v2.AddArg(x)
v1.AddArg(v2)
v0.AddArg(v1) v0.AddArg(v1)
v.AddArg(v0) v.AddArg(v0)
return true return true
...@@ -1374,11 +1372,11 @@ func rewriteValuePPC64_OpCvt32to64F_0(v *Value) bool { ...@@ -1374,11 +1372,11 @@ func rewriteValuePPC64_OpCvt32to64F_0(v *Value) bool {
_ = typ _ = typ
// match: (Cvt32to64F x) // match: (Cvt32to64F x)
// cond: // cond:
// result: (FCFID (Xi2f64 (SignExt32to64 x))) // result: (FCFID (MTVSRD (SignExt32to64 x)))
for { for {
x := v.Args[0] x := v.Args[0]
v.reset(OpPPC64FCFID) v.reset(OpPPC64FCFID)
v0 := b.NewValue0(v.Pos, OpPPC64Xi2f64, typ.Float64) v0 := b.NewValue0(v.Pos, OpPPC64MTVSRD, typ.Float64)
v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v1.AddArg(x) v1.AddArg(x)
v0.AddArg(v1) v0.AddArg(v1)
...@@ -1393,10 +1391,10 @@ func rewriteValuePPC64_OpCvt64Fto32_0(v *Value) bool { ...@@ -1393,10 +1391,10 @@ func rewriteValuePPC64_OpCvt64Fto32_0(v *Value) bool {
_ = typ _ = typ
// match: (Cvt64Fto32 x) // match: (Cvt64Fto32 x)
// cond: // cond:
// result: (Xf2i64 (FCTIWZ x)) // result: (MFVSRD (FCTIWZ x))
for { for {
x := v.Args[0] x := v.Args[0]
v.reset(OpPPC64Xf2i64) v.reset(OpPPC64MFVSRD)
v0 := b.NewValue0(v.Pos, OpPPC64FCTIWZ, typ.Float64) v0 := b.NewValue0(v.Pos, OpPPC64FCTIWZ, typ.Float64)
v0.AddArg(x) v0.AddArg(x)
v.AddArg(v0) v.AddArg(v0)
...@@ -1421,10 +1419,10 @@ func rewriteValuePPC64_OpCvt64Fto64_0(v *Value) bool { ...@@ -1421,10 +1419,10 @@ func rewriteValuePPC64_OpCvt64Fto64_0(v *Value) bool {
_ = typ _ = typ
// match: (Cvt64Fto64 x) // match: (Cvt64Fto64 x)
// cond: // cond:
// result: (Xf2i64 (FCTIDZ x)) // result: (MFVSRD (FCTIDZ x))
for { for {
x := v.Args[0] x := v.Args[0]
v.reset(OpPPC64Xf2i64) v.reset(OpPPC64MFVSRD)
v0 := b.NewValue0(v.Pos, OpPPC64FCTIDZ, typ.Float64) v0 := b.NewValue0(v.Pos, OpPPC64FCTIDZ, typ.Float64)
v0.AddArg(x) v0.AddArg(x)
v.AddArg(v0) v.AddArg(v0)
...@@ -1438,14 +1436,12 @@ func rewriteValuePPC64_OpCvt64to32F_0(v *Value) bool { ...@@ -1438,14 +1436,12 @@ func rewriteValuePPC64_OpCvt64to32F_0(v *Value) bool {
_ = typ _ = typ
// match: (Cvt64to32F x) // match: (Cvt64to32F x)
// cond: // cond:
// result: (FRSP (FCFID (Xi2f64 x))) // result: (FCFIDS (MTVSRD x))
for { for {
x := v.Args[0] x := v.Args[0]
v.reset(OpPPC64FRSP) v.reset(OpPPC64FCFIDS)
v0 := b.NewValue0(v.Pos, OpPPC64FCFID, typ.Float64) v0 := b.NewValue0(v.Pos, OpPPC64MTVSRD, typ.Float64)
v1 := b.NewValue0(v.Pos, OpPPC64Xi2f64, typ.Float64) v0.AddArg(x)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
...@@ -1457,11 +1453,11 @@ func rewriteValuePPC64_OpCvt64to64F_0(v *Value) bool { ...@@ -1457,11 +1453,11 @@ func rewriteValuePPC64_OpCvt64to64F_0(v *Value) bool {
_ = typ _ = typ
// match: (Cvt64to64F x) // match: (Cvt64to64F x)
// cond: // cond:
// result: (FCFID (Xi2f64 x)) // result: (FCFID (MTVSRD x))
for { for {
x := v.Args[0] x := v.Args[0]
v.reset(OpPPC64FCFID) v.reset(OpPPC64FCFID)
v0 := b.NewValue0(v.Pos, OpPPC64Xi2f64, typ.Float64) v0 := b.NewValue0(v.Pos, OpPPC64MTVSRD, typ.Float64)
v0.AddArg(x) v0.AddArg(x)
v.AddArg(v0) v.AddArg(v0)
return true return true
...@@ -6242,6 +6238,33 @@ func rewriteValuePPC64_OpPPC64FADDS_0(v *Value) bool { ...@@ -6242,6 +6238,33 @@ func rewriteValuePPC64_OpPPC64FADDS_0(v *Value) bool {
return false return false
} }
func rewriteValuePPC64_OpPPC64FMOVDload_0(v *Value) bool { func rewriteValuePPC64_OpPPC64FMOVDload_0(v *Value) bool {
// match: (FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr x _))
// cond:
// result: (MTVSRD x)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[1]
ptr := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64MOVDstore {
break
}
if v_1.AuxInt != off {
break
}
if v_1.Aux != sym {
break
}
_ = v_1.Args[2]
if ptr != v_1.Args[0] {
break
}
x := v_1.Args[1]
v.reset(OpPPC64MTVSRD)
v.AddArg(x)
return true
}
// match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) // cond: canMergeSym(sym1,sym2)
// result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) // result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
...@@ -6294,6 +6317,28 @@ func rewriteValuePPC64_OpPPC64FMOVDload_0(v *Value) bool { ...@@ -6294,6 +6317,28 @@ func rewriteValuePPC64_OpPPC64FMOVDload_0(v *Value) bool {
return false return false
} }
func rewriteValuePPC64_OpPPC64FMOVDstore_0(v *Value) bool { func rewriteValuePPC64_OpPPC64FMOVDstore_0(v *Value) bool {
// match: (FMOVDstore [off] {sym} ptr (MTVSRD x) mem)
// cond:
// result: (MOVDstore [off] {sym} ptr x mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64MTVSRD {
break
}
x := v_1.Args[0]
mem := v.Args[2]
v.reset(OpPPC64MOVDstore)
v.AuxInt = off
v.Aux = sym
v.AddArg(ptr)
v.AddArg(x)
v.AddArg(mem)
return true
}
// match: (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) // match: (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
// cond: is16Bit(off1+off2) // cond: is16Bit(off1+off2)
// result: (FMOVDstore [off1+off2] {sym} ptr val mem) // result: (FMOVDstore [off1+off2] {sym} ptr val mem)
...@@ -7070,6 +7115,33 @@ func rewriteValuePPC64_OpPPC64MOVBstorezero_0(v *Value) bool { ...@@ -7070,6 +7115,33 @@ func rewriteValuePPC64_OpPPC64MOVBstorezero_0(v *Value) bool {
return false return false
} }
func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool { func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool {
// match: (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _))
// cond:
// result: (MFVSRD x)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[1]
ptr := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64FMOVDstore {
break
}
if v_1.AuxInt != off {
break
}
if v_1.Aux != sym {
break
}
_ = v_1.Args[2]
if ptr != v_1.Args[0] {
break
}
x := v_1.Args[1]
v.reset(OpPPC64MFVSRD)
v.AddArg(x)
return true
}
// match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) // cond: canMergeSym(sym1,sym2)
// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
...@@ -7122,6 +7194,28 @@ func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool { ...@@ -7122,6 +7194,28 @@ func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool {
return false return false
} }
func rewriteValuePPC64_OpPPC64MOVDstore_0(v *Value) bool { func rewriteValuePPC64_OpPPC64MOVDstore_0(v *Value) bool {
// match: (MOVDstore [off] {sym} ptr (MFVSRD x) mem)
// cond:
// result: (FMOVDstore [off] {sym} ptr x mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64MFVSRD {
break
}
x := v_1.Args[0]
mem := v.Args[2]
v.reset(OpPPC64FMOVDstore)
v.AuxInt = off
v.Aux = sym
v.AddArg(ptr)
v.AddArg(x)
v.AddArg(mem)
return true
}
// match: (MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) // match: (MOVDstore [off1] {sym} (ADDconst [off2] x) val mem)
// cond: is16Bit(off1+off2) // cond: is16Bit(off1+off2)
// result: (MOVDstore [off1+off2] {sym} x val mem) // result: (MOVDstore [off1+off2] {sym} x val mem)
......
...@@ -650,6 +650,8 @@ const ( ...@@ -650,6 +650,8 @@ const (
AFCFIDCC AFCFIDCC
AFCFIDU AFCFIDU
AFCFIDUCC AFCFIDUCC
AFCFIDS
AFCFIDSCC
AFCTID AFCTID
AFCTIDCC AFCTIDCC
AFCTIDZ AFCTIDZ
......
...@@ -271,6 +271,8 @@ var Anames = []string{ ...@@ -271,6 +271,8 @@ var Anames = []string{
"FCFIDCC", "FCFIDCC",
"FCFIDU", "FCFIDU",
"FCFIDUCC", "FCFIDUCC",
"FCFIDS",
"FCFIDSCC",
"FCTID", "FCTID",
"FCTIDCC", "FCTIDCC",
"FCTIDZ", "FCTIDZ",
......
...@@ -1596,6 +1596,8 @@ func buildop(ctxt *obj.Link) { ...@@ -1596,6 +1596,8 @@ func buildop(ctxt *obj.Link) {
opset(AFCFIDCC, r0) opset(AFCFIDCC, r0)
opset(AFCFIDU, r0) opset(AFCFIDU, r0)
opset(AFCFIDUCC, r0) opset(AFCFIDUCC, r0)
opset(AFCFIDS, r0)
opset(AFCFIDSCC, r0)
opset(AFRES, r0) opset(AFRES, r0)
opset(AFRESCC, r0) opset(AFRESCC, r0)
opset(AFRIM, r0) opset(AFRIM, r0)
...@@ -3656,6 +3658,10 @@ func (c *ctxt9) oprrr(a obj.As) uint32 { ...@@ -3656,6 +3658,10 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
return OPVCC(63, 974, 0, 0) return OPVCC(63, 974, 0, 0)
case AFCFIDUCC: case AFCFIDUCC:
return OPVCC(63, 974, 0, 1) return OPVCC(63, 974, 0, 1)
case AFCFIDS:
return OPVCC(59, 846, 0, 0)
case AFCFIDSCC:
return OPVCC(59, 846, 0, 1)
case AFCTIW: case AFCTIW:
return OPVCC(63, 14, 0, 0) return OPVCC(63, 14, 0, 0)
case AFCTIWCC: case AFCTIWCC:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment