Commit b4b2ddb8 authored by Lynn Boger's avatar Lynn Boger Committed by Brad Fitzpatrick

math: improve sqrt for ppc64le,ppc64

The existing implementation uses code written in Go to
implement Sqrt; this adds the assembler to use the sqrt
instruction for Power and makes the necessary changes to
allow it to be inlined.

The following tests showed this relative improvement:

benchmark                 delta
BenchmarkSqrt             -97.91%
BenchmarkSqrtIndirect     -96.65%
BenchmarkSqrtGo           -35.93%
BenchmarkSqrtPrime        -96.94%

Fixes #14349

Change-Id: I8074f4dc63486e756587564ceb320aca300bf5fa
Reviewed-on: https://go-review.googlesource.com/19515Reviewed-by: default avatarMinux Ma <minux@golang.org>
parent 6cb2e1d0
...@@ -677,7 +677,7 @@ opswitch: ...@@ -677,7 +677,7 @@ opswitch:
if n.Left.Op == ONAME && n.Left.Sym.Name == "Sqrt" && n.Left.Sym.Pkg.Path == "math" { if n.Left.Op == ONAME && n.Left.Sym.Name == "Sqrt" && n.Left.Sym.Pkg.Path == "math" {
switch Thearch.Thechar { switch Thearch.Thechar {
case '5', '6', '7': case '5', '6', '7', '9':
n.Op = OSQRT n.Op = OSQRT
n.Left = n.List.First() n.Left = n.List.First()
n.List.Set(nil) n.List.Set(nil)
......
...@@ -706,6 +706,7 @@ func optoas(op gc.Op, t *gc.Type) obj.As { ...@@ -706,6 +706,7 @@ func optoas(op gc.Op, t *gc.Type) obj.As {
OCMP_ = uint32(gc.OCMP) << 16 OCMP_ = uint32(gc.OCMP) << 16
OAS_ = uint32(gc.OAS) << 16 OAS_ = uint32(gc.OAS) << 16
OHMUL_ = uint32(gc.OHMUL) << 16 OHMUL_ = uint32(gc.OHMUL) << 16
OSQRT_ = uint32(gc.OSQRT) << 16
) )
a := obj.AXXX a := obj.AXXX
...@@ -1028,6 +1029,9 @@ func optoas(op gc.Op, t *gc.Type) obj.As { ...@@ -1028,6 +1029,9 @@ func optoas(op gc.Op, t *gc.Type) obj.As {
case ODIV_ | gc.TFLOAT64: case ODIV_ | gc.TFLOAT64:
a = ppc64.AFDIV a = ppc64.AFDIV
case OSQRT_ | gc.TFLOAT64:
a = ppc64.AFSQRT
} }
return a return a
......
...@@ -632,7 +632,8 @@ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { ...@@ -632,7 +632,8 @@ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int {
ppc64.AFMOVD, ppc64.AFMOVD,
ppc64.AFRSP, ppc64.AFRSP,
ppc64.AFNEG, ppc64.AFNEG,
ppc64.AFNEGCC: ppc64.AFNEGCC,
ppc64.AFSQRT:
if s != nil { if s != nil {
if copysub(&p.From, v, s, true) { if copysub(&p.From, v, s, true) {
return 1 return 1
......
...@@ -73,6 +73,7 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{ ...@@ -73,6 +73,7 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{
ppc64.AFCFID & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite}, ppc64.AFCFID & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
ppc64.AFCMPU & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightRead}, ppc64.AFCMPU & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightRead},
ppc64.AFRSP & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv}, ppc64.AFRSP & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv},
ppc64.AFSQRT & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite},
// Moves // Moves
ppc64.AMOVB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv}, ppc64.AMOVB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
......
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64 ppc64le
#include "textflag.h"
// func Sqrt(x float64) float64
TEXT ·Sqrt(SB),NOSPLIT,$0
FMOVD x+0(FP), F0
FSQRT F0, F0
FMOVD F0, ret+8(FP)
RET
...@@ -84,8 +84,5 @@ TEXT ·Sin(SB),NOSPLIT,$0 ...@@ -84,8 +84,5 @@ TEXT ·Sin(SB),NOSPLIT,$0
TEXT ·Cos(SB),NOSPLIT,$0 TEXT ·Cos(SB),NOSPLIT,$0
BR ·cos(SB) BR ·cos(SB)
TEXT ·Sqrt(SB),NOSPLIT,$0
BR ·sqrt(SB)
TEXT ·Tan(SB),NOSPLIT,$0 TEXT ·Tan(SB),NOSPLIT,$0
BR ·tan(SB) BR ·tan(SB)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment