Commit 96cdacb9 authored by Michael Munday's avatar Michael Munday

cmd/asm, cmd/compile: optimize math.Abs and math.Copysign on s390x

This change adds three new instructions:

- LPDFR: load positive (math.Abs(x))
- LNDFR: load negative (-math.Abs(x))
- CPSDR: copy sign (math.Copysign(x, y))

By making use of GPR <-> FPR moves we can now compile math.Abs and
math.Copysign to these instructions using SSA rules.

This CL also adds new rules to merge address generation into combined
load operations. This makes GPR <-> FPR move matching more reliable.

name                 old time/op  new time/op  delta
Copysign             1.85ns ± 0%  1.40ns ± 1%  -24.65%  (p=0.000 n=8+10)
Abs                  1.58ns ± 1%  0.73ns ± 1%  -53.64%  (p=0.000 n=10+10)

The geo mean improvement for all math package benchmarks was 4.6%.

Change-Id: I0cec35c5c1b3fb45243bf666b56b57faca981bc9
Reviewed-on: https://go-review.googlesource.com/73950
Run-TryBot: Michael Munday <mike.munday@ibm.com>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 7fff1db0
...@@ -296,6 +296,9 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- ...@@ -296,6 +296,9 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
FMADDS F1, F2, F3 // b30e3012 FMADDS F1, F2, F3 // b30e3012
FMSUB F4, F5, F5 // b31f5045 FMSUB F4, F5, F5 // b31f5045
FMSUBS F6, F6, F7 // b30f7066 FMSUBS F6, F6, F7 // b30f7066
LPDFR F1, F2 // b3700021
LNDFR F3, F4 // b3710043
CPSDR F5, F6, F7 // b3725076
VL (R15), V1 // e710f0000006 VL (R15), V1 // e710f0000006
VST V1, (R15) // e710f000000e VST V1, (R15) // e710f000000e
......
...@@ -1691,6 +1691,70 @@ var linuxS390XTests = []*asmTest{ ...@@ -1691,6 +1691,70 @@ var linuxS390XTests = []*asmTest{
pos: []string{"\tMOV(B|BZ|D)\t[$]1,"}, pos: []string{"\tMOV(B|BZ|D)\t[$]1,"},
neg: []string{"\tCEBR\t", "\tMOV(B|BZ|D)\t[$]0,"}, neg: []string{"\tCEBR\t", "\tMOV(B|BZ|D)\t[$]0,"},
}, },
// math tests
{
fn: `
func $(x float64) float64 {
return math.Abs(x)
}
`,
pos: []string{"\tLPDFR\t"},
neg: []string{"\tMOVD\t"}, // no integer loads/stores
},
{
fn: `
func $(x float32) float32 {
return float32(math.Abs(float64(x)))
}
`,
pos: []string{"\tLPDFR\t"},
neg: []string{"\tLDEBR\t", "\tLEDBR\t"}, // no float64 conversion
},
{
fn: `
func $(x float64) float64 {
return math.Float64frombits(math.Float64bits(x)|1<<63)
}
`,
pos: []string{"\tLNDFR\t"},
neg: []string{"\tMOVD\t"}, // no integer loads/stores
},
{
fn: `
func $(x float64) float64 {
return -math.Abs(x)
}
`,
pos: []string{"\tLNDFR\t"},
neg: []string{"\tMOVD\t"}, // no integer loads/stores
},
{
fn: `
func $(x, y float64) float64 {
return math.Copysign(x, y)
}
`,
pos: []string{"\tCPSDR\t"},
neg: []string{"\tMOVD\t"}, // no integer loads/stores
},
{
fn: `
func $(x float64) float64 {
return math.Copysign(x, -1)
}
`,
pos: []string{"\tLNDFR\t"},
neg: []string{"\tMOVD\t"}, // no integer loads/stores
},
{
fn: `
func $(x float64) float64 {
return math.Copysign(-1, x)
}
`,
pos: []string{"\tCPSDR\t"},
neg: []string{"\tMOVD\t"}, // no integer loads/stores
},
} }
var linuxARMTests = []*asmTest{ var linuxARMTests = []*asmTest{
......
...@@ -214,6 +214,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -214,6 +214,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
default: default:
v.Fatalf("invalid FIDBR mask: %v", v.AuxInt) v.Fatalf("invalid FIDBR mask: %v", v.AuxInt)
} }
case ssa.OpS390XCPSDR:
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
p.Reg = v.Args[0].Reg()
case ssa.OpS390XDIVD, ssa.OpS390XDIVW, case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
ssa.OpS390XDIVDU, ssa.OpS390XDIVWU, ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
ssa.OpS390XMODD, ssa.OpS390XMODW, ssa.OpS390XMODD, ssa.OpS390XMODW,
...@@ -432,10 +435,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -432,10 +435,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
gc.AddAux2(&p.To, v, sc.Off()) gc.AddAux2(&p.To, v, sc.Off())
case ssa.OpS390XMOVBreg, ssa.OpS390XMOVHreg, ssa.OpS390XMOVWreg, case ssa.OpS390XMOVBreg, ssa.OpS390XMOVHreg, ssa.OpS390XMOVWreg,
ssa.OpS390XMOVBZreg, ssa.OpS390XMOVHZreg, ssa.OpS390XMOVWZreg, ssa.OpS390XMOVBZreg, ssa.OpS390XMOVHZreg, ssa.OpS390XMOVWZreg,
ssa.OpS390XLDGR, ssa.OpS390XLGDR,
ssa.OpS390XCEFBRA, ssa.OpS390XCDFBRA, ssa.OpS390XCEGBRA, ssa.OpS390XCDGBRA, ssa.OpS390XCEFBRA, ssa.OpS390XCDFBRA, ssa.OpS390XCEGBRA, ssa.OpS390XCDGBRA,
ssa.OpS390XCFEBRA, ssa.OpS390XCFDBRA, ssa.OpS390XCGEBRA, ssa.OpS390XCGDBRA, ssa.OpS390XCFEBRA, ssa.OpS390XCFDBRA, ssa.OpS390XCGEBRA, ssa.OpS390XCGDBRA,
ssa.OpS390XLDEBR, ssa.OpS390XLEDBR, ssa.OpS390XLDEBR, ssa.OpS390XLEDBR,
ssa.OpS390XFNEG, ssa.OpS390XFNEGS: ssa.OpS390XFNEG, ssa.OpS390XFNEGS,
ssa.OpS390XLPDFR, ssa.OpS390XLNDFR:
opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
case ssa.OpS390XCLEAR: case ssa.OpS390XCLEAR:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
......
...@@ -688,10 +688,55 @@ ...@@ -688,10 +688,55 @@
(MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem) (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
// replace load from same location as preceding store with copy // replace load from same location as preceding store with copy
(MOVBZload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBZreg x) (MOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVDreg x)
(MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHZreg x) (MOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWreg x)
(MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWZreg x) (MOVHload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHreg x)
(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDreg x) (MOVBload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBreg x)
(MOVWZload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWZreg x)
(MOVHZload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHZreg x)
(MOVBZload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBZreg x)
(MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LGDR x)
(FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LDGR x)
(FMOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
(FMOVSload [off] {sym} ptr1 (FMOVSstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
// prefer FPR <-> GPR moves over combined load ops
(MULLDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (MULLD x (LGDR <t> y))
(ADDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (ADD x (LGDR <t> y))
(SUBload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (SUB x (LGDR <t> y))
(ORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (OR x (LGDR <t> y))
(ANDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (AND x (LGDR <t> y))
(XORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (XOR x (LGDR <t> y))
// detect attempts to set/clear the sign bit
// may need to be reworked when NIHH/OIHH are added
(SRDconst [1] (SLDconst [1] (LGDR <t> x))) -> (LGDR <t> (LPDFR <x.Type> x))
(LDGR <t> (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR <t> x))
(OR (MOVDconst [-1<<63]) (LGDR <t> x)) -> (LGDR <t> (LNDFR <x.Type> x))
(LDGR <t> (OR (MOVDconst [-1<<63]) x)) -> (LNDFR (LDGR <t> x))
// detect attempts to set the sign bit with load
(LDGR <t> x:(ORload <t1> [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (LNDFR <t> (LDGR <t> (MOVDload <t1> [off] {sym} ptr mem)))
// detect copysign
(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x))
(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
(CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y)
(CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y)
// absorb negations into set/clear sign bit
(FNEG (LPDFR x)) -> (LNDFR x)
(FNEG (LNDFR x)) -> (LPDFR x)
(FNEGS (LPDFR x)) -> (LNDFR x)
(FNEGS (LNDFR x)) -> (LPDFR x)
// no need to convert float32 to float64 to set/clear sign bit
(LEDBR (LPDFR (LDEBR x))) -> (LPDFR x)
(LEDBR (LNDFR (LDEBR x))) -> (LNDFR x)
// remove unnecessary FPR <-> GPR moves
(LDGR (LGDR x)) -> x
(LGDR (LDGR x)) -> (MOVDreg x)
// Don't extend before storing // Don't extend before storing
(MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
...@@ -723,6 +768,20 @@ ...@@ -723,6 +768,20 @@
(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem) (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem) (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
(ADDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDload [off1+off2] {sym} x ptr mem)
(ADDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDWload [off1+off2] {sym} x ptr mem)
(MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLDload [off1+off2] {sym} x ptr mem)
(MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLWload [off1+off2] {sym} x ptr mem)
(SUBload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBload [off1+off2] {sym} x ptr mem)
(SUBWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBWload [off1+off2] {sym} x ptr mem)
(ANDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDload [off1+off2] {sym} x ptr mem)
(ANDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDWload [off1+off2] {sym} x ptr mem)
(ORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORload [off1+off2] {sym} x ptr mem)
(ORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORWload [off1+off2] {sym} x ptr mem)
(XORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORload [off1+off2] {sym} x ptr mem)
(XORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORWload [off1+off2] {sym} x ptr mem)
// Fold constants into stores. // Fold constants into stores.
(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB -> (MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
(MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem) (MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
...@@ -780,6 +839,20 @@ ...@@ -780,6 +839,20 @@
(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(ADDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(ADDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(SUBload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(SUBWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(ANDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(ANDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(ORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(ORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(XORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
(XORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
// Cannot store constant to SB directly (no 'move relative long immediate' instructions). // Cannot store constant to SB directly (no 'move relative long immediate' instructions).
(MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> (MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
(MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) (MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
......
...@@ -205,6 +205,9 @@ func init() { ...@@ -205,6 +205,9 @@ func init() {
{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true}, // fp64 arg1 * arg2 + arg0 {name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true}, // fp64 arg1 * arg2 + arg0
{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true}, // fp32 arg1 * arg2 - arg0 {name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true}, // fp32 arg1 * arg2 - arg0
{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true}, // fp64 arg1 * arg2 - arg0 {name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true}, // fp64 arg1 * arg2 - arg0
{name: "LPDFR", argLength: 1, reg: fp11, asm: "LPDFR"}, // fp64/fp32 set sign bit
{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"}, // fp64/fp32 clear sign bit
{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"}, // fp64/fp32 copy arg1 sign bit to arg0
// Round to integer, float64 only. // Round to integer, float64 only.
// //
...@@ -357,6 +360,8 @@ func init() { ...@@ -357,6 +360,8 @@ func init() {
{name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint {name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
{name: "LDGR", argLength: 1, reg: gpfp, asm: "LDGR"}, // move int64 to float64 (no conversion)
{name: "LGDR", argLength: 1, reg: fpgp, asm: "LGDR"}, // move float64 to int64 (no conversion)
{name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA"}, // convert float64 to int32 {name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA"}, // convert float64 to int32
{name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA"}, // convert float64 to int64 {name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA"}, // convert float64 to int64
{name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA"}, // convert float32 to int32 {name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA"}, // convert float32 to int32
......
...@@ -1505,6 +1505,9 @@ const ( ...@@ -1505,6 +1505,9 @@ const (
OpS390XFMADD OpS390XFMADD
OpS390XFMSUBS OpS390XFMSUBS
OpS390XFMSUB OpS390XFMSUB
OpS390XLPDFR
OpS390XLNDFR
OpS390XCPSDR
OpS390XFIDBR OpS390XFIDBR
OpS390XFMOVSload OpS390XFMOVSload
OpS390XFMOVDload OpS390XFMOVDload
...@@ -1610,6 +1613,8 @@ const ( ...@@ -1610,6 +1613,8 @@ const (
OpS390XMOVDreg OpS390XMOVDreg
OpS390XMOVDnop OpS390XMOVDnop
OpS390XMOVDconst OpS390XMOVDconst
OpS390XLDGR
OpS390XLGDR
OpS390XCFDBRA OpS390XCFDBRA
OpS390XCGDBRA OpS390XCGDBRA
OpS390XCFEBRA OpS390XCFEBRA
...@@ -19385,6 +19390,46 @@ var opcodeTable = [...]opInfo{ ...@@ -19385,6 +19390,46 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "LPDFR",
argLen: 1,
asm: s390x.ALPDFR,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{
name: "LNDFR",
argLen: 1,
asm: s390x.ALNDFR,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{
name: "CPSDR",
argLen: 2,
asm: s390x.ACPSDR,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{ {
name: "FIDBR", name: "FIDBR",
auxType: auxInt8, auxType: auxInt8,
...@@ -20950,6 +20995,32 @@ var opcodeTable = [...]opInfo{ ...@@ -20950,6 +20995,32 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "LDGR",
argLen: 1,
asm: s390x.ALDGR,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{
name: "LGDR",
argLen: 1,
asm: s390x.ALGDR,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{ {
name: "CFDBRA", name: "CFDBRA",
argLen: 1, argLen: 1,
......
...@@ -283,12 +283,15 @@ const ( ...@@ -283,12 +283,15 @@ const (
AFNEGS AFNEGS
ALEDBR ALEDBR
ALDEBR ALDEBR
ALPDFR
ALNDFR
AFSUB AFSUB
AFSUBS AFSUBS
AFSQRT AFSQRT
AFSQRTS AFSQRTS
AFIEBR AFIEBR
AFIDBR AFIDBR
ACPSDR
// move from GPR to FPR and vice versa // move from GPR to FPR and vice versa
ALDGR ALDGR
......
...@@ -81,12 +81,15 @@ var Anames = []string{ ...@@ -81,12 +81,15 @@ var Anames = []string{
"FNEGS", "FNEGS",
"LEDBR", "LEDBR",
"LDEBR", "LDEBR",
"LPDFR",
"LNDFR",
"FSUB", "FSUB",
"FSUBS", "FSUBS",
"FSQRT", "FSQRT",
"FSQRTS", "FSQRTS",
"FIEBR", "FIEBR",
"FIDBR", "FIDBR",
"CPSDR",
"LDGR", "LDGR",
"LGDR", "LGDR",
"CEFBRA", "CEFBRA",
......
...@@ -212,6 +212,7 @@ var optab = []Optab{ ...@@ -212,6 +212,7 @@ var optab = []Optab{
Optab{ACEFBRA, C_REG, C_NONE, C_NONE, C_FREG, 82, 0}, Optab{ACEFBRA, C_REG, C_NONE, C_NONE, C_FREG, 82, 0},
Optab{ACFEBRA, C_FREG, C_NONE, C_NONE, C_REG, 83, 0}, Optab{ACFEBRA, C_FREG, C_NONE, C_NONE, C_REG, 83, 0},
Optab{AFIEBR, C_SCON, C_FREG, C_NONE, C_FREG, 48, 0}, Optab{AFIEBR, C_SCON, C_FREG, C_NONE, C_FREG, 48, 0},
Optab{ACPSDR, C_FREG, C_FREG, C_NONE, C_FREG, 49, 0},
// load symbol address (plus offset) // load symbol address (plus offset)
Optab{AMOVD, C_SYMADDR, C_NONE, C_NONE, C_REG, 19, 0}, Optab{AMOVD, C_SYMADDR, C_NONE, C_NONE, C_REG, 19, 0},
...@@ -897,6 +898,8 @@ func buildop(ctxt *obj.Link) { ...@@ -897,6 +898,8 @@ func buildop(ctxt *obj.Link) {
opset(ABCL, r) opset(ABCL, r)
case AFABS: case AFABS:
opset(AFNABS, r) opset(AFNABS, r)
opset(ALPDFR, r)
opset(ALNDFR, r)
opset(AFNEG, r) opset(AFNEG, r)
opset(AFNEGS, r) opset(AFNEGS, r)
opset(ALEDBR, r) opset(ALEDBR, r)
...@@ -3182,6 +3185,10 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { ...@@ -3182,6 +3185,10 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
opcode = op_LPDBR opcode = op_LPDBR
case AFNABS: case AFNABS:
opcode = op_LNDBR opcode = op_LNDBR
case ALPDFR:
opcode = op_LPDFR
case ALNDFR:
opcode = op_LNDFR
case AFNEG: case AFNEG:
opcode = op_LCDFR opcode = op_LCDFR
case AFNEGS: case AFNEGS:
...@@ -3281,6 +3288,9 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { ...@@ -3281,6 +3288,9 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
} }
zRRF(opcode, uint32(m3), 0, uint32(p.To.Reg), uint32(p.Reg), asm) zRRF(opcode, uint32(m3), 0, uint32(p.To.Reg), uint32(p.Reg), asm)
case 49: // copysign
zRRF(op_CPSDR, uint32(p.From.Reg), 0, uint32(p.To.Reg), uint32(p.Reg), asm)
case 67: // fmov $0 freg case 67: // fmov $0 freg
var opcode uint32 var opcode uint32
switch p.As { switch p.As {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment