Commit ac2ceba0 authored by Ruixin(Peter) Bao's avatar Ruixin(Peter) Bao Committed by Michael Munday

cmd/compile/internal/gc: intrinsify mulWW on s390x

SSA rule have already been added previously to intrisinfy Mul/Mul64 on s390x. In this CL,
we want to let mulWW use that SSA rule as well. Also removed an extra line for formatting.

Benchmarks:
QuoRem-18                            3.59µs ±15%    2.94µs ± 3%  -18.06%  (p=0.000 n=8+8)
ModSqrt225_Tonelli-18                 806µs ± 0%     800µs ± 0%   -0.85%  (p=0.000 n=7+8)
ModSqrt225_3Mod4-18                   245µs ± 1%     243µs ± 0%   -0.81%  (p=0.001 n=8+8)
ModSqrt231_Tonelli-18                 837µs ± 0%     834µs ± 1%   -0.36%  (p=0.028 n=8+8)
ModSqrt231_5Mod8-18                   282µs ± 0%     280µs ± 0%   -0.76%  (p=0.000 n=8+8)
Sqrt-18                              45.8µs ± 2%    38.6µs ± 0%  -15.63%  (p=0.000 n=8+8)
IntSqr/1-18                          19.1ns ± 0%    13.1ns ± 0%  -31.41%  (p=0.000 n=8+8)
IntSqr/2-18                          48.3ns ± 2%    48.2ns ± 0%     ~     (p=0.094 n=8+8)
IntSqr/3-18                          70.5ns ± 1%    70.7ns ± 0%     ~     (p=0.428 n=8+8)
IntSqr/5-18                           119ns ± 1%     118ns ± 0%   -1.02%  (p=0.000 n=7+8)
IntSqr/8-18                           215ns ± 1%     215ns ± 0%     ~     (p=0.320 n=8+7)
IntSqr/10-18                          302ns ± 1%     301ns ± 0%     ~     (p=0.148 n=8+7)
IntSqr/20-18                          952ns ± 1%     807ns ± 0%  -15.28%  (p=0.000 n=8+8)
IntSqr/30-18                         1.74µs ± 0%    1.53µs ± 0%  -11.93%  (p=0.000 n=8+8)
IntSqr/50-18                         3.91µs ± 0%    3.57µs ± 0%   -8.64%  (p=0.000 n=7+8)
IntSqr/80-18                         8.66µs ± 1%    8.11µs ± 0%   -6.39%  (p=0.000 n=8+8)
IntSqr/100-18                        12.8µs ± 0%    12.2µs ± 0%   -5.19%  (p=0.000 n=8+8)
IntSqr/200-18                        46.0µs ± 0%    44.5µs ± 0%   -3.06%  (p=0.000 n=8+8)
IntSqr/300-18                        81.4µs ± 0%    78.4µs ± 0%   -3.71%  (p=0.000 n=7+8)
IntSqr/500-18                         212µs ± 1%     206µs ± 0%   -2.66%  (p=0.000 n=8+8)
IntSqr/800-18                         419µs ± 1%     406µs ± 0%   -3.07%  (p=0.000 n=8+8)
IntSqr/1000-18                        635µs ± 0%     621µs ± 0%   -2.13%  (p=0.000 n=8+8)

Change-Id: Ib097857186932b902601ab087cbeff3fc9555c3e
Reviewed-on: https://go-review.googlesource.com/c/go/+/197639
Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 9c2e7e8b
......@@ -3671,7 +3671,7 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1])
},
sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64LE, sys.ArchPPC64)
sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64LE, sys.ArchPPC64, sys.ArchS390X)
add("math/big", "divWW",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
......
......@@ -237,7 +237,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Reg = r0
p.To.Reg = s390x.REG_R2
p.To.Type = obj.TYPE_REG
case ssa.OpS390XFMADD, ssa.OpS390XFMADDS,
ssa.OpS390XFMSUB, ssa.OpS390XFMSUBS:
r := v.Reg()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment