Commit 88672de7 authored by Bill O'Farrell's avatar Bill O'Farrell Committed by Michael Munday

math: use SIMD to accelerate additional scalar math functions on s390x

As necessary, math functions were structured to use stubs, so that they can
be accelerated with assembly on any platform.

Technique used was minimax polynomial approximation using tables of
polynomial coefficients, with argument range reduction.

Benchmark         New     Old     Speedup
BenchmarkAcos     12.2    47.5    3.89
BenchmarkAcosh    18.5    56.2    3.04
BenchmarkAsin     13.1    40.6    3.10
BenchmarkAsinh    19.4    62.8    3.24
BenchmarkAtan     10.1    23      2.28
BenchmarkAtanh    19.1    53.2    2.79
BenchmarkAtan2    16.5    33.9    2.05
BenchmarkCbrt     14.8    58      3.92
BenchmarkErf      10.8    20.1    1.86
BenchmarkErfc     11.2    23.5    2.10
BenchmarkExp      8.77    53.8    6.13
BenchmarkExpm1    10.1    38.3    3.79
BenchmarkLog      13.1    40.1    3.06
BenchmarkLog1p    12.7    38.3    3.02
BenchmarkPowInt   31.7    40.5    1.28
BenchmarkPowFrac  33.1    141     4.26
BenchmarkTan      11.5    30      2.61

Accuracy was tested against a high precision
reference function to determine maximum error.
Note: ulperr is error in "units in the last place"

       max
      ulperr
Acos  1.15
Acosh 1.07
Asin  2.22
Asinh 1.72
Atan  1.41
Atanh 3.00
Atan2 1.45
Cbrt  1.18
Erf   1.29
Erfc  4.82
Exp   1.00
Expm1 2.26
Log   0.94
Log1p 2.39
Tan   3.14

Pow will have 99.99% correctly rounded results with reasonable inputs
producing numeric (non Inf or NaN) results

Change-Id: I850e8cf7b70426e8b54ec49d74acd4cddc8c6cb2
Reviewed-on: https://go-review.googlesource.com/38585Reviewed-by: default avatarMichael Munday <munday@ca.ibm.com>
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 8c49c06b
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·acosrodataL13<> + 0(SB)/8, $0.314159265358979323E+01 //pi
DATA ·acosrodataL13<> + 8(SB)/8, $-0.0
DATA ·acosrodataL13<> + 16(SB)/8, $0x7ff8000000000000 //Nan
DATA ·acosrodataL13<> + 24(SB)/8, $-1.0
DATA ·acosrodataL13<> + 32(SB)/8, $1.0
DATA ·acosrodataL13<> + 40(SB)/8, $0.166666666666651626E+00
DATA ·acosrodataL13<> + 48(SB)/8, $0.750000000042621169E-01
DATA ·acosrodataL13<> + 56(SB)/8, $0.446428567178116477E-01
DATA ·acosrodataL13<> + 64(SB)/8, $0.303819660378071894E-01
DATA ·acosrodataL13<> + 72(SB)/8, $0.223715011892010405E-01
DATA ·acosrodataL13<> + 80(SB)/8, $0.173659424522364952E-01
DATA ·acosrodataL13<> + 88(SB)/8, $0.137810186504372266E-01
DATA ·acosrodataL13<> + 96(SB)/8, $0.134066870961173521E-01
DATA ·acosrodataL13<> + 104(SB)/8, $-.412335502831898721E-02
DATA ·acosrodataL13<> + 112(SB)/8, $0.867383739532082719E-01
DATA ·acosrodataL13<> + 120(SB)/8, $-.328765950607171649E+00
DATA ·acosrodataL13<> + 128(SB)/8, $0.110401073869414626E+01
DATA ·acosrodataL13<> + 136(SB)/8, $-.270694366992537307E+01
DATA ·acosrodataL13<> + 144(SB)/8, $0.500196500770928669E+01
DATA ·acosrodataL13<> + 152(SB)/8, $-.665866959108585165E+01
DATA ·acosrodataL13<> + 160(SB)/8, $-.344895269334086578E+01
DATA ·acosrodataL13<> + 168(SB)/8, $0.927437952918301659E+00
DATA ·acosrodataL13<> + 176(SB)/8, $0.610487478874645653E+01
DATA ·acosrodataL13<> + 184(SB)/8, $0.157079632679489656e+01
DATA ·acosrodataL13<> + 192(SB)/8, $0.0
GLOBL ·acosrodataL13<> + 0(SB), RODATA, $200
// Acos returns the arccosine, in radians, of the argument.
//
// Special case is:
// Acos(x) = NaN if x < -1 or x > 1
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·acosAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD acosrodataL13<>+0(SB), R9
WORD $0xB3CD00C0 //lgdr %r12, %f0
FMOVD F0, F10
SRAD $32, R12
WORD $0xC0293FE6 //iilf %r2,1072079005
BYTE $0xA0
BYTE $0x9D
WORD $0xB917001C //llgtr %r1,%r12
CMPW R1,R2
BGT L2
FMOVD 192(R9), F8
FMADD F0, F0, F8
FMOVD 184(R9), F1
L3:
WFMDB V8, V8, V2
FMOVD 176(R9), F6
FMOVD 168(R9), F0
FMOVD 160(R9), F4
WFMADB V2, V0, V6, V0
FMOVD 152(R9), F6
WFMADB V2, V4, V6, V4
FMOVD 144(R9), F6
WFMADB V2, V0, V6, V0
FMOVD 136(R9), F6
WFMADB V2, V4, V6, V4
FMOVD 128(R9), F6
WFMADB V2, V0, V6, V0
FMOVD 120(R9), F6
WFMADB V2, V4, V6, V4
FMOVD 112(R9), F6
WFMADB V2, V0, V6, V0
FMOVD 104(R9), F6
WFMADB V2, V4, V6, V4
FMOVD 96(R9), F6
WFMADB V2, V0, V6, V0
FMOVD 88(R9), F6
WFMADB V2, V4, V6, V4
FMOVD 80(R9), F6
WFMADB V2, V0, V6, V0
FMOVD 72(R9), F6
WFMADB V2, V4, V6, V4
FMOVD 64(R9), F6
WFMADB V2, V0, V6, V0
FMOVD 56(R9), F6
WFMADB V2, V4, V6, V4
FMOVD 48(R9), F6
WFMADB V2, V0, V6, V0
FMOVD 40(R9), F6
WFMADB V2, V4, V6, V2
FMOVD 192(R9), F4
WFMADB V8, V0, V2, V0
WFMADB V10, V8, V4, V8
FMADD F0, F8, F10
WFSDB V10, V1, V10
L1:
FMOVD F10, ret+8(FP)
RET
L2:
WORD $0xC0293FEF //iilf %r2,1072693247
BYTE $0xFF
BYTE $0xFF
CMPW R1, R2
BLE L12
L4:
WORD $0xED009020 //cdb %f0,.L34-.L13(%r9)
BYTE $0x00
BYTE $0x19
BEQ L8
WORD $0xED009018 //cdb %f0,.L35-.L13(%r9)
BYTE $0x00
BYTE $0x19
BEQ L9
WFCEDBS V10, V10, V0
BVS L1
FMOVD 16(R9), F10
BR L1
L12:
FMOVD 24(R9), F0
FMADD F10, F10, F0
WORD $0xB3130080 //lcdbr %f8,%f0
WORD $0xED009008 //cdb %f0,.L37-.L13(%r9)
BYTE $0x00
BYTE $0x19
FSQRT F8, F10
L5:
MOVW R12, R4
CMPBLE R4, $0, L7
WORD $0xB31300AA //lcdbr %f10,%f10
FMOVD $0, F1
BR L3
L9:
FMOVD 0(R9), F10
BR L1
L8:
FMOVD $0, F0
FMOVD F0, ret+8(FP)
RET
L7:
FMOVD 0(R9), F1
BR L3
......@@ -39,7 +39,9 @@ package math
// Acosh(+Inf) = +Inf
// Acosh(x) = NaN if x < 1
// Acosh(NaN) = NaN
func Acosh(x float64) float64 {
func Acosh(x float64) float64
func acosh(x float64) float64 {
const (
Ln2 = 6.93147180559945286227e-01 // 0x3FE62E42FEFA39EF
Large = 1 << 28 // 2**28
......
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·acoshrodataL11<> + 0(SB)/8, $-1.0
DATA ·acoshrodataL11<> + 8(SB)/8, $.41375273347623353626
DATA ·acoshrodataL11<> + 16(SB)/8, $.51487302528619766235E+04
DATA ·acoshrodataL11<> + 24(SB)/8, $-1.67526912689208984375
DATA ·acoshrodataL11<> + 32(SB)/8, $0.181818181818181826E+00
DATA ·acoshrodataL11<> + 40(SB)/8, $-.165289256198351540E-01
DATA ·acoshrodataL11<> + 48(SB)/8, $0.200350613573012186E-02
DATA ·acoshrodataL11<> + 56(SB)/8, $-.273205381970859341E-03
DATA ·acoshrodataL11<> + 64(SB)/8, $0.397389654305194527E-04
DATA ·acoshrodataL11<> + 72(SB)/8, $0.938370938292558173E-06
DATA ·acoshrodataL11<> + 80(SB)/8, $-.602107458843052029E-05
DATA ·acoshrodataL11<> + 88(SB)/8, $0.212881813645679599E-07
DATA ·acoshrodataL11<> + 96(SB)/8, $-.148682720127920854E-06
DATA ·acoshrodataL11<> + 104(SB)/8, $-5.5
DATA ·acoshrodataL11<> + 112(SB)/8, $0x7ff8000000000000 //Nan
GLOBL ·acoshrodataL11<> + 0(SB), RODATA, $120
// Table of log correction terms
DATA ·acoshtab2068<> + 0(SB)/8, $0.585235384085551248E-01
DATA ·acoshtab2068<> + 8(SB)/8, $0.412206153771168640E-01
DATA ·acoshtab2068<> + 16(SB)/8, $0.273839003221648339E-01
DATA ·acoshtab2068<> + 24(SB)/8, $0.166383778368856480E-01
DATA ·acoshtab2068<> + 32(SB)/8, $0.866678223433169637E-02
DATA ·acoshtab2068<> + 40(SB)/8, $0.319831684989627514E-02
DATA ·acoshtab2068<> + 48(SB)/8, $0.0
DATA ·acoshtab2068<> + 56(SB)/8, $-.113006378583725549E-02
DATA ·acoshtab2068<> + 64(SB)/8, $-.367979419636602491E-03
DATA ·acoshtab2068<> + 72(SB)/8, $0.213172484510484979E-02
DATA ·acoshtab2068<> + 80(SB)/8, $0.623271047682013536E-02
DATA ·acoshtab2068<> + 88(SB)/8, $0.118140812789696885E-01
DATA ·acoshtab2068<> + 96(SB)/8, $0.187681358930914206E-01
DATA ·acoshtab2068<> + 104(SB)/8, $0.269985148668178992E-01
DATA ·acoshtab2068<> + 112(SB)/8, $0.364186619761331328E-01
DATA ·acoshtab2068<> + 120(SB)/8, $0.469505379381388441E-01
GLOBL ·acoshtab2068<> + 0(SB), RODATA, $128
// Acosh returns the inverse hyperbolic cosine of the argument.
//
// Special cases are:
// Acosh(+Inf) = +Inf
// Acosh(x) = NaN if x < 1
// Acosh(NaN) = NaN
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·acoshAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD acoshrodataL11<>+0(SB), R9
WORD $0xB3CD0010 //lgdr %r1, %f0
WORD $0xC0295FEF //iilf %r2,1609564159
BYTE $0xFF
BYTE $0xFF
SRAD $32, R1
CMPW R1, R2
BGT L2
WORD $0xC0293FEF //iilf %r2,1072693247
BYTE $0xFF
BYTE $0xFF
CMPW R1, R2
BGT L10
L3:
WFCEDBS V0, V0, V2
BVS L1
FMOVD 112(R9), F0
L1:
FMOVD F0, ret+8(FP)
RET
L2:
WORD $0xC0297FEF //iilf %r2,2146435071
BYTE $0xFF
BYTE $0xFF
MOVW R1, R6
MOVW R2, R7
CMPBGT R6, R7, L1
FMOVD F0, F8
FMOVD $0, F0
WFADB V0, V8, V0
WORD $0xC0398006 //iilf %r3,2147909631
BYTE $0x7F
BYTE $0xFF
WORD $0xB3CD0050 //lgdr %r5, %f0
SRAD $32, R5
MOVH $0x0, R1
SUBW R5, R3
FMOVD $0, F10
WORD $0xEC4320AF //risbg %r4,%r3,32,128+47,0
BYTE $0x00
BYTE $0x55
WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,64-13
BYTE $0x33
BYTE $0x55
BYTE $0x18 //lr %r2,%r4
BYTE $0x24
WORD $0xEC14001F //risbgn %r1,%r4,64-64+0,64-64+0+32-1,64-0-32
BYTE $0x20
BYTE $0x59
SUBW $0x100000, R2
SRAW $8, R2, R2
ORW $0x45000000, R2
L5:
WORD $0xB3C10001 //ldgr %f0,%r1
FMOVD 104(R9), F2
FMADD F8, F0, F2
FMOVD 96(R9), F4
WFMADB V10, V0, V2, V0
FMOVD 88(R9), F6
FMOVD 80(R9), F2
WFMADB V0, V6, V4, V6
FMOVD 72(R9), F1
WFMDB V0, V0, V4
WFMADB V0, V1, V2, V1
FMOVD 64(R9), F2
WFMADB V6, V4, V1, V6
FMOVD 56(R9), F1
WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,0
BYTE $0x00
BYTE $0x55
WFMADB V0, V2, V1, V2
FMOVD 48(R9), F1
WFMADB V4, V6, V2, V6
FMOVD 40(R9), F2
WFMADB V0, V1, V2, V1
VLVGF $0, R2, V2
WFMADB V4, V6, V1, V4
LDEBR F2, F2
FMOVD 32(R9), F6
WFMADB V0, V4, V6, V4
FMOVD 24(R9), F1
FMOVD 16(R9), F6
MOVD acoshtab2068<>+0(SB), R1
WFMADB V2, V1, V6, V2
FMOVD 0(R3)(R1*1), F3
WFMADB V0, V4, V3, V0
FMOVD 8(R9), F4
FMADD F4, F2, F0
FMOVD F0, ret+8(FP)
RET
L10:
FMOVD F0, F8
FMOVD 0(R9), F0
FMADD F8, F8, F0
WORD $0xB3120000 //ltdbr %f0,%f0
FSQRT F0, F10
L4:
WFADB V10, V8, V0
WORD $0xC0398006 //iilf %r3,2147909631
BYTE $0x7F
BYTE $0xFF
WORD $0xB3CD0050 //lgdr %r5, %f0
SRAD $32, R5
MOVH $0x0, R1
SUBW R5, R3
SRAW $8, R3, R2
WORD $0xEC4320AF //risbg %r4,%r3,32,128+47,0
BYTE $0x00
BYTE $0x55
ANDW $0xFFFFFF00, R2
WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,64-13
BYTE $0x33
BYTE $0x55
ORW $0x45000000, R2
WORD $0xEC14001F //risbgn %r1,%r4,64-64+0,64-64+0+32-1,64-0-32
BYTE $0x20
BYTE $0x59
BR L5
......@@ -22,6 +22,54 @@ func sinhAsm(x float64) float64
func tanhTrampolineSetup(x float64) float64
func tanhAsm(x float64) float64
func log1pTrampolineSetup(x float64) float64
func log1pAsm(x float64) float64
func atanhTrampolineSetup(x float64) float64
func atanhAsm(x float64) float64
func acosTrampolineSetup(x float64) float64
func acosAsm(x float64) float64
func acoshTrampolineSetup(x float64) float64
func acoshAsm(x float64) float64
func asinTrampolineSetup(x float64) float64
func asinAsm(x float64) float64
func asinhTrampolineSetup(x float64) float64
func asinhAsm(x float64) float64
func erfTrampolineSetup(x float64) float64
func erfAsm(x float64) float64
func erfcTrampolineSetup(x float64) float64
func erfcAsm(x float64) float64
func atanTrampolineSetup(x float64) float64
func atanAsm(x float64) float64
func atan2TrampolineSetup(x, y float64) float64
func atan2Asm(x, y float64) float64
func cbrtTrampolineSetup(x float64) float64
func cbrtAsm(x float64) float64
func logTrampolineSetup(x float64) float64
func logAsm(x float64) float64
func tanTrampolineSetup(x float64) float64
func tanAsm(x float64) float64
func expTrampolineSetup(x float64) float64
func expAsm(x float64) float64
func expm1TrampolineSetup(x float64) float64
func expm1Asm(x float64) float64
func powTrampolineSetup(x, y float64) float64
func powAsm(x, y float64) float64
// hasVectorFacility reports whether the machine has the z/Architecture
// vector facility installed and enabled.
func hasVectorFacility() bool
......
......@@ -106,6 +106,37 @@ func TestLargeSinNovec(t *testing.T) {
}
}
func TestLargeTanNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
large := float64(100000 * Pi)
for i := 0; i < len(vf); i++ {
f1 := tanLarge[i]
f2 := TanNovec(vf[i] + large)
if !close(f1, f2) {
t.Errorf("Tan(%g) = %g, want %g", vf[i]+large, f2, f1)
}
}
}
func TestTanNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
if f := TanNovec(vf[i]); !veryclose(tan[i], f) {
t.Errorf("Tan(%g) = %g, want %g", vf[i], f, tan[i])
}
}
// same special cases as Sin
for i := 0; i < len(vfsinSC); i++ {
if f := TanNovec(vfsinSC[i]); !alike(sinSC[i], f) {
t.Errorf("Tan(%g) = %g, want %g", vfsinSC[i], f, sinSC[i])
}
}
}
func TestTanhNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
......@@ -142,3 +173,270 @@ func TestLog10Novec(t *testing.T) {
}
}
}
func TestLog1pNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
a := vf[i] / 100
if f := Log1pNovec(a); !veryclose(log1p[i], f) {
t.Errorf("Log1p(%g) = %g, want %g", a, f, log1p[i])
}
}
a := 9.0
if f := Log1pNovec(a); f != Ln10 {
t.Errorf("Log1p(%g) = %g, want %g", a, f, Ln10)
}
for i := 0; i < len(vflogSC); i++ {
if f := Log1pNovec(vflog1pSC[i]); !alike(log1pSC[i], f) {
t.Errorf("Log1p(%g) = %g, want %g", vflog1pSC[i], f, log1pSC[i])
}
}
}
func TestAtanhNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
a := vf[i] / 10
if f := AtanhNovec(a); !veryclose(atanh[i], f) {
t.Errorf("Atanh(%g) = %g, want %g", a, f, atanh[i])
}
}
for i := 0; i < len(vfatanhSC); i++ {
if f := AtanhNovec(vfatanhSC[i]); !alike(atanhSC[i], f) {
t.Errorf("Atanh(%g) = %g, want %g", vfatanhSC[i], f, atanhSC[i])
}
}
}
func TestAcosNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
a := vf[i] / 10
if f := AcosNovec(a); !close(acos[i], f) {
t.Errorf("Acos(%g) = %g, want %g", a, f, acos[i])
}
}
for i := 0; i < len(vfacosSC); i++ {
if f := AcosNovec(vfacosSC[i]); !alike(acosSC[i], f) {
t.Errorf("Acos(%g) = %g, want %g", vfacosSC[i], f, acosSC[i])
}
}
}
func TestAsinNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
a := vf[i] / 10
if f := AsinNovec(a); !veryclose(asin[i], f) {
t.Errorf("Asin(%g) = %g, want %g", a, f, asin[i])
}
}
for i := 0; i < len(vfasinSC); i++ {
if f := AsinNovec(vfasinSC[i]); !alike(asinSC[i], f) {
t.Errorf("Asin(%g) = %g, want %g", vfasinSC[i], f, asinSC[i])
}
}
}
func TestAcoshNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
a := 1 + Abs(vf[i])
if f := AcoshNovec(a); !veryclose(acosh[i], f) {
t.Errorf("Acosh(%g) = %g, want %g", a, f, acosh[i])
}
}
for i := 0; i < len(vfacoshSC); i++ {
if f := AcoshNovec(vfacoshSC[i]); !alike(acoshSC[i], f) {
t.Errorf("Acosh(%g) = %g, want %g", vfacoshSC[i], f, acoshSC[i])
}
}
}
func TestAsinhNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
if f := AsinhNovec(vf[i]); !veryclose(asinh[i], f) {
t.Errorf("Asinh(%g) = %g, want %g", vf[i], f, asinh[i])
}
}
for i := 0; i < len(vfasinhSC); i++ {
if f := AsinhNovec(vfasinhSC[i]); !alike(asinhSC[i], f) {
t.Errorf("Asinh(%g) = %g, want %g", vfasinhSC[i], f, asinhSC[i])
}
}
}
func TestErfNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
a := vf[i] / 10
if f := ErfNovec(a); !veryclose(erf[i], f) {
t.Errorf("Erf(%g) = %g, want %g", a, f, erf[i])
}
}
for i := 0; i < len(vferfSC); i++ {
if f := ErfNovec(vferfSC[i]); !alike(erfSC[i], f) {
t.Errorf("Erf(%g) = %g, want %g", vferfSC[i], f, erfSC[i])
}
}
}
func TestErfcNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
a := vf[i] / 10
if f := ErfcNovec(a); !veryclose(erfc[i], f) {
t.Errorf("Erfc(%g) = %g, want %g", a, f, erfc[i])
}
}
for i := 0; i < len(vferfcSC); i++ {
if f := ErfcNovec(vferfcSC[i]); !alike(erfcSC[i], f) {
t.Errorf("Erfc(%g) = %g, want %g", vferfcSC[i], f, erfcSC[i])
}
}
}
func TestAtanNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
if f := AtanNovec(vf[i]); !veryclose(atan[i], f) {
t.Errorf("Atan(%g) = %g, want %g", vf[i], f, atan[i])
}
}
for i := 0; i < len(vfatanSC); i++ {
if f := AtanNovec(vfatanSC[i]); !alike(atanSC[i], f) {
t.Errorf("Atan(%g) = %g, want %g", vfatanSC[i], f, atanSC[i])
}
}
}
func TestAtan2Novec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
if f := Atan2Novec(10, vf[i]); !veryclose(atan2[i], f) {
t.Errorf("Atan2(10, %g) = %g, want %g", vf[i], f, atan2[i])
}
}
for i := 0; i < len(vfatan2SC); i++ {
if f := Atan2Novec(vfatan2SC[i][0], vfatan2SC[i][1]); !alike(atan2SC[i], f) {
t.Errorf("Atan2(%g, %g) = %g, want %g", vfatan2SC[i][0], vfatan2SC[i][1], f, atan2SC[i])
}
}
}
func TestCbrtNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
if f := CbrtNovec(vf[i]); !veryclose(cbrt[i], f) {
t.Errorf("Cbrt(%g) = %g, want %g", vf[i], f, cbrt[i])
}
}
for i := 0; i < len(vfcbrtSC); i++ {
if f := CbrtNovec(vfcbrtSC[i]); !alike(cbrtSC[i], f) {
t.Errorf("Cbrt(%g) = %g, want %g", vfcbrtSC[i], f, cbrtSC[i])
}
}
}
func TestLogNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
a := Abs(vf[i])
if f := LogNovec(a); log[i] != f {
t.Errorf("Log(%g) = %g, want %g", a, f, log[i])
}
}
if f := LogNovec(10); f != Ln10 {
t.Errorf("Log(%g) = %g, want %g", 10.0, f, Ln10)
}
for i := 0; i < len(vflogSC); i++ {
if f := LogNovec(vflogSC[i]); !alike(logSC[i], f) {
t.Errorf("Log(%g) = %g, want %g", vflogSC[i], f, logSC[i])
}
}
}
func TestExpNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
testExpNovec(t, Exp, "Exp")
testExpNovec(t, ExpGo, "ExpGo")
}
func testExpNovec(t *testing.T, Exp func(float64) float64, name string) {
for i := 0; i < len(vf); i++ {
if f := ExpNovec(vf[i]); !veryclose(exp[i], f) {
t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp[i])
}
}
for i := 0; i < len(vfexpSC); i++ {
if f := ExpNovec(vfexpSC[i]); !alike(expSC[i], f) {
t.Errorf("%s(%g) = %g, want %g", name, vfexpSC[i], f, expSC[i])
}
}
}
func TestExpm1Novec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
a := vf[i] / 100
if f := Expm1Novec(a); !veryclose(expm1[i], f) {
t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1[i])
}
}
for i := 0; i < len(vf); i++ {
a := vf[i] * 10
if f := Expm1Novec(a); !close(expm1Large[i], f) {
t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1Large[i])
}
}
for i := 0; i < len(vfexpm1SC); i++ {
if f := Expm1Novec(vfexpm1SC[i]); !alike(expm1SC[i], f) {
t.Errorf("Expm1(%g) = %g, want %g", vfexpm1SC[i], f, expm1SC[i])
}
}
}
func TestPowNovec(t *testing.T) {
if !HasVX {
t.Skipf("no vector support")
}
for i := 0; i < len(vf); i++ {
if f := PowNovec(10, vf[i]); !close(pow[i], f) {
t.Errorf("Pow(10, %g) = %g, want %g", vf[i], f, pow[i])
}
}
for i := 0; i < len(vfpowSC); i++ {
if f := PowNovec(vfpowSC[i][0], vfpowSC[i][1]); !alike(powSC[i], f) {
t.Errorf("Pow(%g, %g) = %g, want %g", vfpowSC[i][0], vfpowSC[i][1], f, powSC[i])
}
}
}
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·asinrodataL15<> + 0(SB)/8, $-1.309611320495605469
DATA ·asinrodataL15<> + 8(SB)/8, $0x3ff921fb54442d18
DATA ·asinrodataL15<> + 16(SB)/8, $0xbff921fb54442d18
DATA ·asinrodataL15<> + 24(SB)/8, $1.309611320495605469
DATA ·asinrodataL15<> + 32(SB)/8, $-0.0
DATA ·asinrodataL15<> + 40(SB)/8, $1.199437040755305217
DATA ·asinrodataL15<> + 48(SB)/8, $0.166666666666651626E+00
DATA ·asinrodataL15<> + 56(SB)/8, $0.750000000042621169E-01
DATA ·asinrodataL15<> + 64(SB)/8, $0.446428567178116477E-01
DATA ·asinrodataL15<> + 72(SB)/8, $0.303819660378071894E-01
DATA ·asinrodataL15<> + 80(SB)/8, $0.223715011892010405E-01
DATA ·asinrodataL15<> + 88(SB)/8, $0.173659424522364952E-01
DATA ·asinrodataL15<> + 96(SB)/8, $0.137810186504372266E-01
DATA ·asinrodataL15<> + 104(SB)/8, $0.134066870961173521E-01
DATA ·asinrodataL15<> + 112(SB)/8, $-.412335502831898721E-02
DATA ·asinrodataL15<> + 120(SB)/8, $0.867383739532082719E-01
DATA ·asinrodataL15<> + 128(SB)/8, $-.328765950607171649E+00
DATA ·asinrodataL15<> + 136(SB)/8, $0.110401073869414626E+01
DATA ·asinrodataL15<> + 144(SB)/8, $-.270694366992537307E+01
DATA ·asinrodataL15<> + 152(SB)/8, $0.500196500770928669E+01
DATA ·asinrodataL15<> + 160(SB)/8, $-.665866959108585165E+01
DATA ·asinrodataL15<> + 168(SB)/8, $-.344895269334086578E+01
DATA ·asinrodataL15<> + 176(SB)/8, $0.927437952918301659E+00
DATA ·asinrodataL15<> + 184(SB)/8, $0.610487478874645653E+01
DATA ·asinrodataL15<> + 192(SB)/8, $0x7ff8000000000000 //+Inf
DATA ·asinrodataL15<> + 200(SB)/8, $-1.0
DATA ·asinrodataL15<> + 208(SB)/8, $1.0
DATA ·asinrodataL15<> + 216(SB)/8, $1.00000000000000000e-20
GLOBL ·asinrodataL15<> + 0(SB), RODATA, $224
// Asin returns the arcsine, in radians, of the argument.
//
// Special cases are:
// Asin(±0) = ±0=
// Asin(x) = NaN if x < -1 or x > 1
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·asinAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD asinrodataL15<>+0(SB), R9
WORD $0xB3CD0070 //lgdr %r7, %f0
FMOVD F0, F8
SRAD $32, R7
WORD $0xC0193FE6 //iilf %r1,1072079005
BYTE $0xA0
BYTE $0x9D
WORD $0xB91700C7 //llgtr %r12,%r7
MOVW R12, R8
MOVW R1, R6
CMPBGT R8, R6, L2
WORD $0xC0193BFF //iilf %r1,1006632959
BYTE $0xFF
BYTE $0xFF
MOVW R1, R6
CMPBGT R8, R6, L13
L3:
FMOVD 216(R9), F0
FMADD F0, F8, F8
L1:
FMOVD F8, ret+8(FP)
RET
L2:
WORD $0xC0193FEF //iilf %r1,1072693247
BYTE $0xFF
BYTE $0xFF
CMPW R12, R1
BLE L14
L5:
WORD $0xED0090D0 //cdb %f0,.L17-.L15(%r9)
BYTE $0x00
BYTE $0x19
BEQ L9
WORD $0xED0090C8 //cdb %f0,.L18-.L15(%r9)
BYTE $0x00
BYTE $0x19
BEQ L10
WFCEDBS V8, V8, V0
BVS L1
FMOVD 192(R9), F8
BR L1
L13:
WFMDB V0, V0, V10
L4:
WFMDB V10, V10, V0
FMOVD 184(R9), F6
FMOVD 176(R9), F2
FMOVD 168(R9), F4
WFMADB V0, V2, V6, V2
FMOVD 160(R9), F6
WFMADB V0, V4, V6, V4
FMOVD 152(R9), F6
WFMADB V0, V2, V6, V2
FMOVD 144(R9), F6
WFMADB V0, V4, V6, V4
FMOVD 136(R9), F6
WFMADB V0, V2, V6, V2
WORD $0xC0193FE6 //iilf %r1,1072079005
BYTE $0xA0
BYTE $0x9D
FMOVD 128(R9), F6
WFMADB V0, V4, V6, V4
FMOVD 120(R9), F6
WFMADB V0, V2, V6, V2
FMOVD 112(R9), F6
WFMADB V0, V4, V6, V4
FMOVD 104(R9), F6
WFMADB V0, V2, V6, V2
FMOVD 96(R9), F6
WFMADB V0, V4, V6, V4
FMOVD 88(R9), F6
WFMADB V0, V2, V6, V2
FMOVD 80(R9), F6
WFMADB V0, V4, V6, V4
FMOVD 72(R9), F6
WFMADB V0, V2, V6, V2
FMOVD 64(R9), F6
WFMADB V0, V4, V6, V4
FMOVD 56(R9), F6
WFMADB V0, V2, V6, V2
FMOVD 48(R9), F6
WFMADB V0, V4, V6, V0
WFMDB V8, V10, V4
FMADD F2, F10, F0
FMADD F0, F4, F8
CMPW R12, R1
BLE L1
FMOVD 40(R9), F0
FMADD F0, F1, F8
FMOVD F8, ret+8(FP)
RET
L14:
FMOVD 200(R9), F0
FMADD F8, F8, F0
WORD $0xB31300A0 //lcdbr %f10,%f0
WORD $0xED009020 //cdb %f0,.L39-.L15(%r9)
BYTE $0x00
BYTE $0x19
FSQRT F10, F8
L6:
MOVW R7, R6
CMPBLE R6, $0, L8
WORD $0xB3130088 //lcdbr %f8,%f8
FMOVD 24(R9), F1
BR L4
L10:
FMOVD 16(R9), F8
BR L1
L9:
FMOVD 8(R9), F8
FMOVD F8, ret+8(FP)
RET
L8:
FMOVD 0(R9), F1
BR L4
......@@ -36,7 +36,9 @@ package math
// Asinh(±0) = ±0
// Asinh(±Inf) = ±Inf
// Asinh(NaN) = NaN
func Asinh(x float64) float64 {
func Asinh(x float64) float64
func asinh(x float64) float64 {
const (
Ln2 = 6.93147180559945286227e-01 // 0x3FE62E42FEFA39EF
NearZero = 1.0 / (1 << 28) // 2**-28
......
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·asinhrodataL18<> + 0(SB)/8, $0.749999999977387502E-01
DATA ·asinhrodataL18<> + 8(SB)/8, $-.166666666666657082E+00
DATA ·asinhrodataL18<> + 16(SB)/8, $0.303819368237360639E-01
DATA ·asinhrodataL18<> + 24(SB)/8, $-.446428569571752982E-01
DATA ·asinhrodataL18<> + 32(SB)/8, $0.173500047922695924E-01
DATA ·asinhrodataL18<> + 40(SB)/8, $-.223719767210027185E-01
DATA ·asinhrodataL18<> + 48(SB)/8, $0.113655037946822130E-01
DATA ·asinhrodataL18<> + 56(SB)/8, $0.579747490622448943E-02
DATA ·asinhrodataL18<> + 64(SB)/8, $-.139372433914359122E-01
DATA ·asinhrodataL18<> + 72(SB)/8, $-.218674325255800840E-02
DATA ·asinhrodataL18<> + 80(SB)/8, $-.891074277756961157E-02
DATA ·asinhrodataL18<> + 88(SB)/8, $.41375273347623353626
DATA ·asinhrodataL18<> + 96(SB)/8, $.51487302528619766235E+04
DATA ·asinhrodataL18<> + 104(SB)/8, $-1.67526912689208984375
DATA ·asinhrodataL18<> + 112(SB)/8, $0.181818181818181826E+00
DATA ·asinhrodataL18<> + 120(SB)/8, $-.165289256198351540E-01
DATA ·asinhrodataL18<> + 128(SB)/8, $0.200350613573012186E-02
DATA ·asinhrodataL18<> + 136(SB)/8, $-.273205381970859341E-03
DATA ·asinhrodataL18<> + 144(SB)/8, $0.397389654305194527E-04
DATA ·asinhrodataL18<> + 152(SB)/8, $0.938370938292558173E-06
DATA ·asinhrodataL18<> + 160(SB)/8, $0.212881813645679599E-07
DATA ·asinhrodataL18<> + 168(SB)/8, $-.602107458843052029E-05
DATA ·asinhrodataL18<> + 176(SB)/8, $-.148682720127920854E-06
DATA ·asinhrodataL18<> + 184(SB)/8, $-5.5
DATA ·asinhrodataL18<> + 192(SB)/8, $1.0
DATA ·asinhrodataL18<> + 200(SB)/8, $1.0E-20
GLOBL ·asinhrodataL18<> + 0(SB), RODATA, $208
// Table of log correction terms
DATA ·asinhtab2080<> + 0(SB)/8, $0.585235384085551248E-01
DATA ·asinhtab2080<> + 8(SB)/8, $0.412206153771168640E-01
DATA ·asinhtab2080<> + 16(SB)/8, $0.273839003221648339E-01
DATA ·asinhtab2080<> + 24(SB)/8, $0.166383778368856480E-01
DATA ·asinhtab2080<> + 32(SB)/8, $0.866678223433169637E-02
DATA ·asinhtab2080<> + 40(SB)/8, $0.319831684989627514E-02
DATA ·asinhtab2080<> + 48(SB)/8, $0.0
DATA ·asinhtab2080<> + 56(SB)/8, $-.113006378583725549E-02
DATA ·asinhtab2080<> + 64(SB)/8, $-.367979419636602491E-03
DATA ·asinhtab2080<> + 72(SB)/8, $0.213172484510484979E-02
DATA ·asinhtab2080<> + 80(SB)/8, $0.623271047682013536E-02
DATA ·asinhtab2080<> + 88(SB)/8, $0.118140812789696885E-01
DATA ·asinhtab2080<> + 96(SB)/8, $0.187681358930914206E-01
DATA ·asinhtab2080<> + 104(SB)/8, $0.269985148668178992E-01
DATA ·asinhtab2080<> + 112(SB)/8, $0.364186619761331328E-01
DATA ·asinhtab2080<> + 120(SB)/8, $0.469505379381388441E-01
GLOBL ·asinhtab2080<> + 0(SB), RODATA, $128
// Asinh returns the inverse hyperbolic sine of the argument.
//
// Special cases are:
// Asinh(±0) = ±0
// AsinhInf) = ±Inf
// Asinh(NaN) = NaN
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·asinhAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD asinhrodataL18<>+0(SB), R9
WORD $0xB3CD00C0 //lgdr %r12, %f0
WORD $0xC0293FDF //iilf %r2,1071644671
BYTE $0xFF
BYTE $0xFF
SRAD $32, R12
WORD $0xB917001C //llgtr %r1,%r12
MOVW R1, R6
MOVW R2, R7
CMPBLE R6, R7, L2
WORD $0xC0295FEF //iilf %r2,1609564159
BYTE $0xFF
BYTE $0xFF
MOVW R2, R7
CMPBLE R6, R7, L14
L3:
WORD $0xC0297FEF //iilf %r2,2146435071
BYTE $0xFF
BYTE $0xFF
CMPW R1, R2
BGT L1
WORD $0xB3120000 //ltdbr %f0,%f0
FMOVD F0, F10
BLTU L15
L9:
FMOVD $0, F0
WFADB V0, V10, V0
WORD $0xC0398006 //iilf %r3,2147909631
BYTE $0x7F
BYTE $0xFF
WORD $0xB3CD0050 //lgdr %r5, %f0
SRAD $32, R5
MOVH $0x0, R2
SUBW R5, R3
FMOVD $0, F8
WORD $0xEC4320AF //risbg %r4,%r3,32,128+47,0
BYTE $0x00
BYTE $0x55
BYTE $0x18 //lr %r1,%r4
BYTE $0x14
WORD $0xEC24001F //risbgn %r2,%r4,64-64+0,64-64+0+32-1,64-0-32
BYTE $0x20
BYTE $0x59
SUBW $0x100000, R1
SRAW $8, R1, R1
ORW $0x45000000, R1
BR L6
L2:
MOVD $0x30000000, R2
CMPW R1, R2
BGT L16
FMOVD 200(R9), F2
FMADD F2, F0, F0
L1:
FMOVD F0, ret+8(FP)
RET
L14:
WORD $0xB3120000 //ltdbr %f0,%f0
BLTU L17
FMOVD F0, F10
L4:
FMOVD 192(R9), F2
WFMADB V0, V0, V2, V0
WORD $0xB3120000 //ltdbr %f0,%f0
FSQRT F0, F8
L5:
WFADB V8, V10, V0
WORD $0xC0398006 //iilf %r3,2147909631
BYTE $0x7F
BYTE $0xFF
WORD $0xB3CD0050 //lgdr %r5, %f0
SRAD $32, R5
MOVH $0x0, R2
SUBW R5, R3
WORD $0xEC4320AF //risbg %r4,%r3,32,128+47,0
BYTE $0x00
BYTE $0x55
SRAW $8, R4, R1
WORD $0xEC24001F //risbgn %r2,%r4,64-64+0,64-64+0+32-1,64-0-32
BYTE $0x20
BYTE $0x59
ORW $0x45000000, R1
L6:
WORD $0xB3C10022 //ldgr %f2,%r2
FMOVD 184(R9), F0
WFMADB V8, V2, V0, V8
FMOVD 176(R9), F4
WFMADB V10, V2, V8, V2
FMOVD 168(R9), F0
FMOVD 160(R9), F6
FMOVD 152(R9), F1
WFMADB V2, V6, V4, V6
WFMADB V2, V1, V0, V1
WFMDB V2, V2, V4
FMOVD 144(R9), F0
WFMADB V6, V4, V1, V6
FMOVD 136(R9), F1
WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,64-13
BYTE $0x33
BYTE $0x55
WFMADB V2, V0, V1, V0
FMOVD 128(R9), F1
WFMADB V4, V6, V0, V6
FMOVD 120(R9), F0
WFMADB V2, V1, V0, V1
VLVGF $0, R1, V0
WFMADB V4, V6, V1, V4
LDEBR F0, F0
FMOVD 112(R9), F6
WFMADB V2, V4, V6, V4
MOVD asinhtab2080<>+0(SB), R1
FMOVD 104(R9), F1
WORD $0x68331000 //ld %f3,0(%r3,%r1)
FMOVD 96(R9), F6
WFMADB V2, V4, V3, V2
WFMADB V0, V1, V6, V0
FMOVD 88(R9), F4
WFMADB V0, V4, V2, V0
MOVD R12, R6
CMPBGT R6, $0, L1
WORD $0xB3130000 //lcdbr %f0,%f0
FMOVD F0, ret+8(FP)
RET
L16:
WFMDB V0, V0, V1
FMOVD 80(R9), F6
WFMDB V1, V1, V4
FMOVD 72(R9), F2
WFMADB V4, V2, V6, V2
FMOVD 64(R9), F3
FMOVD 56(R9), F6
WFMADB V4, V2, V3, V2
FMOVD 48(R9), F3
WFMADB V4, V6, V3, V6
FMOVD 40(R9), F5
FMOVD 32(R9), F3
WFMADB V4, V2, V5, V2
WFMADB V4, V6, V3, V6
FMOVD 24(R9), F5
FMOVD 16(R9), F3
WFMADB V4, V2, V5, V2
WFMADB V4, V6, V3, V6
FMOVD 8(R9), F5
FMOVD 0(R9), F3
WFMADB V4, V2, V5, V2
WFMADB V4, V6, V3, V4
WFMDB V0, V1, V6
WFMADB V1, V4, V2, V4
FMADD F4, F6, F0
FMOVD F0, ret+8(FP)
RET
L17:
WORD $0xB31300A0 //lcdbr %f10,%f0
BR L4
L15:
WORD $0xB31300A0 //lcdbr %f10,%f0
BR L9
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32 arm
#include "textflag.h"
TEXT ·Acosh(SB),NOSPLIT,$0
JMP ·acosh(SB)
TEXT ·Asinh(SB),NOSPLIT,$0
JMP ·asinh(SB)
TEXT ·Atanh(SB),NOSPLIT,$0
JMP ·atanh(SB)
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
#define PosInf 0x7FF0000000000000
#define NegInf 0xFFF0000000000000
#define NegZero 0x8000000000000000
#define Pi 0x400921FB54442D18
#define NegPi 0xC00921FB54442D18
#define Pi3Div4 0x4002D97C7F3321D2 // 3Pi/4
#define NegPi3Div4 0xC002D97C7F3321D2 // -3Pi/4
#define PiDiv4 0x3FE921FB54442D18 // Pi/4
#define NegPiDiv4 0xBFE921FB54442D18 // -Pi/4
// Minimax polynomial coefficients and other constants
DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
// Atan2 returns the arc tangent of y/x, using
// the signs of the two to determine the quadrant
// of the return value.
//
// Special cases are (in order):
// Atan2(y, NaN) = NaN
// Atan2(NaN, x) = NaN
// Atan2(+0, x>=0) = +0
// Atan2(-0, x>=0) = -0
// Atan2(+0, x<=-0) = +Pi
// Atan2(-0, x<=-0) = -Pi
// Atan2(y>0, 0) = +Pi/2
// Atan2(y<0, 0) = -Pi/2
// Atan2(+Inf, +Inf) = +Pi/4
// Atan2(-Inf, +Inf) = -Pi/4
// Atan2(+Inf, -Inf) = 3Pi/4
// Atan2(-Inf, -Inf) = -3Pi/4
// Atan2(y, +Inf) = 0
// Atan2(y>0, -Inf) = +Pi
// Atan2(y<0, -Inf) = -Pi
// Atan2(+Inf, x) = +Pi/2
// Atan2(-Inf, x) = -Pi/2
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·atan2Asm(SB), NOSPLIT, $0-24
// special case
MOVD x+0(FP), R1
MOVD y+8(FP), R2
// special case Atan2(NaN, y) = NaN
MOVD $~(1<<63), R5
AND R1, R5 // x = |x|
MOVD $PosInf, R3
CMPUBLT R3, R5, returnX
// special case Atan2(x, NaN) = NaN
MOVD $~(1<<63), R5
AND R2, R5
CMPUBLT R3, R5, returnY
MOVD $NegZero, R3
CMPUBEQ R3, R1, xIsNegZero
MOVD $0, R3
CMPUBEQ R3, R1, xIsPosZero
MOVD $PosInf, R4
CMPUBEQ R4, R2, yIsPosInf
MOVD $NegInf, R4
CMPUBEQ R4, R2, yIsNegInf
BR Normal
xIsNegZero:
// special case Atan(-0, y>=0) = -0
MOVD $0, R4
CMPBLE R4, R2, returnX
//special case Atan2(-0, y<=-0) = -Pi
MOVD $NegZero, R4
CMPBGE R4, R2, returnNegPi
BR Normal
xIsPosZero:
//special case Atan2(0, 0) = 0
MOVD $0, R4
CMPUBEQ R4, R2, returnX
//special case Atan2(0, y<=-0) = Pi
MOVD $NegZero, R4
CMPBGE R4, R2, returnPi
BR Normal
yIsNegInf:
//special case Atan2(+Inf, -Inf) = 3Pi/4
MOVD $PosInf, R3
CMPUBEQ R3, R1, posInfNegInf
//special case Atan2(-Inf, -Inf) = -3Pi/4
MOVD $NegInf, R3
CMPUBEQ R3, R1, negInfNegInf
BR Normal
yIsPosInf:
//special case Atan2(+Inf, +Inf) = Pi/4
MOVD $PosInf, R3
CMPUBEQ R3, R1, posInfPosInf
//special case Atan2(-Inf, +Inf) = -Pi/4
MOVD $NegInf, R3
CMPUBEQ R3, R1, negInfPosInf
//special case Atan2(-Pi, +Inf) = Pi
MOVD $NegPi, R3
CMPUBEQ R3, R1, negPiPosInf
Normal:
FMOVD x+0(FP), F0
FMOVD y+8(FP), F2
MOVD atan2rodataL25<>+0(SB), R9
WORD $0xB3CD0020 //lgdr %r2,%f0
WORD $0xB3CD0012 //lgdr %r1,%f2
WORD $0xEC2220BF //risbgn %r2,%r2,64-32,128+63,64+0+32
BYTE $0x60
BYTE $0x59
WORD $0xEC1120BF //risbgn %r1,%r1,64-32,128+63,64+0+32
BYTE $0x60
BYTE $0x59
WORD $0xB9170032 //llgtr %r3,%r2
WORD $0xEC523FBF //risbg %r5,%r2,64-1,128+63,64+32+1
BYTE $0x61
BYTE $0x55
WORD $0xB9170041 //llgtr %r4,%r1
WFLCDB V0, V20
MOVW R4, R6
MOVW R3, R7
CMPUBLT R6, R7, L17
WFDDB V2, V0, V3
ADDW $2, R5, R2
MOVW R4, R6
MOVW R3, R7
CMPUBLE R6, R7, L20
L3:
WFMDB V3, V3, V4
VLEG $0, 152(R9), V18
VLEG $0, 144(R9), V16
FMOVD 136(R9), F1
FMOVD 128(R9), F5
FMOVD 120(R9), F6
WFMADB V4, V16, V5, V16
WFMADB V4, V6, V1, V6
FMOVD 112(R9), F7
WFMDB V4, V4, V1
WFMADB V4, V7, V18, V7
VLEG $0, 104(R9), V18
WFMADB V1, V6, V16, V6
CMPWU R4, R3
FMOVD 96(R9), F5
VLEG $0, 88(R9), V16
WFMADB V4, V5, V18, V5
VLEG $0, 80(R9), V18
VLEG $0, 72(R9), V22
WFMADB V4, V16, V18, V16
VLEG $0, 64(R9), V18
WFMADB V1, V7, V5, V7
WFMADB V4, V18, V22, V18
WFMDB V1, V1, V5
WFMADB V1, V16, V18, V16
VLEG $0, 56(R9), V18
WFMADB V5, V6, V7, V6
VLEG $0, 48(R9), V22
FMOVD 40(R9), F7
WFMADB V4, V7, V18, V7
VLEG $0, 32(R9), V18
WFMADB V5, V6, V16, V6
WFMADB V4, V18, V22, V18
VLEG $0, 24(R9), V16
WFMADB V1, V7, V18, V7
VLEG $0, 16(R9), V18
VLEG $0, 8(R9), V22
WFMADB V4, V18, V16, V18
VLEG $0, 0(R9), V16
WFMADB V5, V6, V7, V6
WFMADB V4, V16, V22, V16
FMUL F3, F4
WFMADB V1, V18, V16, V1
FMADD F6, F5, F1
WFMADB V4, V1, V3, V4
BLT L18
BGT L7
WORD $0xB3120022 //ltdbr %f2,%f2
BLTU L21
L8:
WORD $0xB3120000 //ltdbr %f0,%f0
BLTU L22
L9:
WFCHDBS V2, V0, V0
BNE L18
L7:
MOVW R1, R6
CMPBGE R6, $0, L1
L18:
WORD $0xEC223ABC //risbg %r2,%r2,58,128+60,3
BYTE $0x03
BYTE $0x55
MOVD atan2xpi2h<>+0(SB), R1
MOVD ·atan2xpim<>+0(SB), R3
WORD $0xB3C10003 //ldgr %f0,%r3
WORD $0xED021000 //madb %f4,%f0,0(%r2,%r1)
BYTE $0x40
BYTE $0x1E
L1:
FMOVD F4, ret+16(FP)
RET
L20:
WORD $0xB3120022 //ltdbr %f2,%f2
BLTU L23
FMOVD F2, F6
L4:
WORD $0xB3120000 //ltdbr %f0,%f0
BLTU L24
FMOVD F0, F4
L5:
WFCHDBS V6, V4, V4
BEQ L3
L17:
WFDDB V0, V2, V4
BYTE $0x18 //lr %r2,%r5
BYTE $0x25
WORD $0xB3130034 //lcdbr %f3,%f4
BR L3
L23:
WORD $0xB3130062 //lcdbr %f6,%f2
BR L4
L22:
VLR V20, V0
BR L9
L21:
WORD $0xB3130022 //lcdbr %f2,%f2
BR L8
L24:
VLR V20, V4
BR L5
returnX: //the result is same as the first argument
MOVD R1, ret+16(FP)
RET
returnY: //the result is same as the second argument
MOVD R2, ret+16(FP)
RET
returnPi:
MOVD $Pi, R1
MOVD R1, ret+16(FP)
RET
returnNegPi:
MOVD $NegPi, R1
MOVD R1, ret+16(FP)
RET
posInfNegInf:
MOVD $Pi3Div4, R1
MOVD R1, ret+16(FP)
RET
negInfNegInf:
MOVD $NegPi3Div4, R1
MOVD R1, ret+16(FP)
RET
posInfPosInf:
MOVD $PiDiv4, R1
MOVD R1, ret+16(FP)
RET
negInfPosInf:
MOVD $NegPiDiv4, R1
MOVD R1, ret+16(FP)
RET
negPiPosInf:
MOVD $NegZero, R1
MOVD R1, ret+16(FP)
RET
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·atanrodataL8<> + 0(SB)/8, $0.199999999999554423E+00
DATA ·atanrodataL8<> + 8(SB)/8, $0.111111110136634272E+00
DATA ·atanrodataL8<> + 16(SB)/8, $-.142857142828026806E+00
DATA ·atanrodataL8<> + 24(SB)/8, $-.333333333333330928E+00
DATA ·atanrodataL8<> + 32(SB)/8, $0.769228118888682505E-01
DATA ·atanrodataL8<> + 40(SB)/8, $0.588059263575587687E-01
DATA ·atanrodataL8<> + 48(SB)/8, $-.666641501287528609E-01
DATA ·atanrodataL8<> + 56(SB)/8, $-.909090711945939878E-01
DATA ·atanrodataL8<> + 64(SB)/8, $0.472329433805024762E-01
DATA ·atanrodataL8<> + 72(SB)/8, $0.366935664549587481E-01
DATA ·atanrodataL8<> + 80(SB)/8, $-.422172007412067035E-01
DATA ·atanrodataL8<> + 88(SB)/8, $-.299856214685512712E-01
DATA ·atanrodataL8<> + 96(SB)/8, $0.220852012160300086E-01
DATA ·atanrodataL8<> + 104(SB)/8, $0.726338160757602439E-02
DATA ·atanrodataL8<> + 112(SB)/8, $0.843488472994227321E-03
DATA ·atanrodataL8<> + 120(SB)/8, $0.134893651284712515E-04
DATA ·atanrodataL8<> + 128(SB)/8, $-.525380587584426406E-01
DATA ·atanrodataL8<> + 136(SB)/8, $-.139950258898989925E-01
DATA ·atanrodataL8<> + 144(SB)/8, $-.291935324869629616E-02
DATA ·atanrodataL8<> + 152(SB)/8, $-.154797890856877418E-03
GLOBL ·atanrodataL8<> + 0(SB), RODATA, $160
DATA ·atanxpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
DATA ·atanxpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
DATA ·atanxpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
DATA ·atanxpi2h<> + 24(SB)/4, $0xc00330e4e4fa7b1b
GLOBL ·atanxpi2h<> + 0(SB), RODATA, $32
DATA ·atanxpim<> + 0(SB)/8, $0x3ff4f42b00000000
GLOBL ·atanxpim<> + 0(SB), RODATA, $8
DATA ·atanxmone<> + 0(SB)/8, $-1.0
GLOBL ·atanxmone<> + 0(SB), RODATA, $8
// Atan returns the arctangent, in radians, of the argument.
//
// Special cases are:
// Atan(±0) = ±0
// AtanInf) = ±Pi/2Pi
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·atanAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
//special case Atan(±0) = ±0
FMOVD $(0.0), F1
FCMPU F0, F1
BEQ atanIsZero
MOVD atanrodataL8<>+0(SB), R5
MOVH $0x3FE0, R3
WORD $0xB3CD0010 //lgdr %r1,%f0
WORD $0xEC1120BF //risbgn %r1,%r1,64-32,128+63,64+0+32
BYTE $0x60
BYTE $0x59
RLL $16, R1, R2
ANDW $0x7FF0, R2
MOVW R2, R6
MOVW R3, R7
CMPUBLE R6, R7, L6
MOVD atanxmone<>+0(SB), R3
FMOVD 0(R3), F2
WFDDB V0, V2, V0
WORD $0xEC113FBF //risbg %r1,%r1,64-1,128+63,64+32+1
BYTE $0x61
BYTE $0x55
MOVD atanxpi2h<>+0(SB), R3
MOVWZ R1, R1
SLD $3, R1, R1
WORD $0x68813000 //ld %f8,0(%r1,%r3)
L6:
WFMDB V0, V0, V2
FMOVD 152(R5), F6
FMOVD 144(R5), F1
FMOVD 136(R5), F7
VLEG $0, 128(R5), V16
FMOVD 120(R5), F4
FMOVD 112(R5), F5
WFMADB V2, V4, V6, V4
WFMADB V2, V5, V1, V5
WFMDB V2, V2, V6
FMOVD 104(R5), F3
FMOVD 96(R5), F1
WFMADB V2, V3, V7, V3
MOVH $0x3FE0, R1
FMOVD 88(R5), F7
WFMADB V2, V1, V7, V1
FMOVD 80(R5), F7
WFMADB V6, V3, V1, V3
WFMADB V6, V4, V5, V4
WFMDB V6, V6, V1
FMOVD 72(R5), F5
WFMADB V2, V5, V7, V5
FMOVD 64(R5), F7
WFMADB V2, V7, V16, V7
VLEG $0, 56(R5), V16
WFMADB V6, V5, V7, V5
WFMADB V1, V4, V3, V4
FMOVD 48(R5), F7
FMOVD 40(R5), F3
WFMADB V2, V3, V7, V3
FMOVD 32(R5), F7
WFMADB V2, V7, V16, V7
VLEG $0, 24(R5), V16
WFMADB V1, V4, V5, V4
FMOVD 16(R5), F5
WFMADB V6, V3, V7, V3
FMOVD 8(R5), F7
WFMADB V2, V7, V5, V7
FMOVD 0(R5), F5
WFMADB V2, V5, V16, V5
WFMADB V1, V4, V3, V4
WFMADB V6, V7, V5, V6
FMUL F0, F2
FMADD F4, F1, F6
FMADD F6, F2, F0
MOVW R2, R6
MOVW R1, R7
CMPUBLE R6, R7, L1
MOVD atanxpim<>+0(SB), R1
WORD $0xED801000 //madb %f0,%f8,0(%r1)
BYTE $0x00
BYTE $0x1E
L1:
atanIsZero:
FMOVD F0, ret+8(FP)
RET
......@@ -44,7 +44,9 @@ package math
// Atanh(-1) = -Inf
// Atanh(x) = NaN if x < -1 or x > 1
// Atanh(NaN) = NaN
func Atanh(x float64) float64 {
func Atanh(x float64) float64
func atanh(x float64) float64 {
const NearZero = 1.0 / (1 << 28) // 2**-28
// special cases
switch {
......
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·atanhrodataL10<> + 0(SB)/8, $.41375273347623353626
DATA ·atanhrodataL10<> + 8(SB)/8, $.51487302528619766235E+04
DATA ·atanhrodataL10<> + 16(SB)/8, $-1.67526912689208984375
DATA ·atanhrodataL10<> + 24(SB)/8, $0.181818181818181826E+00
DATA ·atanhrodataL10<> + 32(SB)/8, $-.165289256198351540E-01
DATA ·atanhrodataL10<> + 40(SB)/8, $0.200350613573012186E-02
DATA ·atanhrodataL10<> + 48(SB)/8, $0.397389654305194527E-04
DATA ·atanhrodataL10<> + 56(SB)/8, $-.273205381970859341E-03
DATA ·atanhrodataL10<> + 64(SB)/8, $0.938370938292558173E-06
DATA ·atanhrodataL10<> + 72(SB)/8, $-.148682720127920854E-06
DATA ·atanhrodataL10<> + 80(SB)/8, $ 0.212881813645679599E-07
DATA ·atanhrodataL10<> + 88(SB)/8, $-.602107458843052029E-05
DATA ·atanhrodataL10<> + 96(SB)/8, $-5.5
DATA ·atanhrodataL10<> + 104(SB)/8, $-0.5
DATA ·atanhrodataL10<> + 112(SB)/8, $0.0
DATA ·atanhrodataL10<> + 120(SB)/8, $0x7ff8000000000000 //Nan
DATA ·atanhrodataL10<> + 128(SB)/8, $-1.0
DATA ·atanhrodataL10<> + 136(SB)/8, $1.0
DATA ·atanhrodataL10<> + 144(SB)/8, $1.0E-20
GLOBL ·atanhrodataL10<> + 0(SB), RODATA, $152
// Table of log correction terms
DATA ·atanhtab2076<> + 0(SB)/8, $0.585235384085551248E-01
DATA ·atanhtab2076<> + 8(SB)/8, $0.412206153771168640E-01
DATA ·atanhtab2076<> + 16(SB)/8, $0.273839003221648339E-01
DATA ·atanhtab2076<> + 24(SB)/8, $0.166383778368856480E-01
DATA ·atanhtab2076<> + 32(SB)/8, $0.866678223433169637E-02
DATA ·atanhtab2076<> + 40(SB)/8, $0.319831684989627514E-02
DATA ·atanhtab2076<> + 48(SB)/8, $0.000000000000000000E+00
DATA ·atanhtab2076<> + 56(SB)/8, $-.113006378583725549E-02
DATA ·atanhtab2076<> + 64(SB)/8, $-.367979419636602491E-03
DATA ·atanhtab2076<> + 72(SB)/8, $0.213172484510484979E-02
DATA ·atanhtab2076<> + 80(SB)/8, $0.623271047682013536E-02
DATA ·atanhtab2076<> + 88(SB)/8, $0.118140812789696885E-01
DATA ·atanhtab2076<> + 96(SB)/8, $0.187681358930914206E-01
DATA ·atanhtab2076<> + 104(SB)/8, $0.269985148668178992E-01
DATA ·atanhtab2076<> + 112(SB)/8, $0.364186619761331328E-01
DATA ·atanhtab2076<> + 120(SB)/8, $0.469505379381388441E-01
GLOBL ·atanhtab2076<> + 0(SB), RODATA, $128
// Table of +/- .5
DATA ·atanhtabh2075<> + 0(SB)/8, $0.5
DATA ·atanhtabh2075<> + 8(SB)/8, $-.5
GLOBL ·atanhtabh2075<> + 0(SB), RODATA, $16
// Atanh returns the inverse hyperbolic tangent of the argument.
//
// Special cases are:
// Atanh(1) = +Inf
// Atanh(±0) = ±0
// Atanh(-1) = -Inf
// Atanh(x) = NaN if x < -1 or x > 1
// Atanh(NaN) = NaN
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·atanhAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD atanhrodataL10<>+0(SB), R5
WORD $0xB3CD0010 //lgdr %r1, %f0
WORD $0xC0393FEF //iilf %r3,1072693247
BYTE $0xFF
BYTE $0xFF
SRAD $32, R1
WORD $0xB9170021 //llgtr %r2,%r1
MOVW R2, R6
MOVW R3, R7
CMPBGT R6, R7, L2
WORD $0xC0392FFF //iilf %r3,805306367
BYTE $0xFF
BYTE $0xFF
MOVW R2, R6
MOVW R3, R7
CMPBGT R6, R7, L9
L3:
FMOVD 144(R5), F2
FMADD F2, F0, F0
L1:
FMOVD F0, ret+8(FP)
RET
L2:
WORD $0xED005088 //cdb %f0,.L12-.L10(%r5)
BYTE $0x00
BYTE $0x19
BEQ L5
WORD $0xED005080 //cdb %f0,.L13-.L10(%r5)
BYTE $0x00
BYTE $0x19
BEQ L5
WFCEDBS V0, V0, V2
BVS L1
FMOVD 120(R5), F0
BR L1
L5:
WORD $0xED005070 //ddb %f0,.L15-.L10(%r5)
BYTE $0x00
BYTE $0x1D
FMOVD F0, ret+8(FP)
RET
L9:
FMOVD F0, F2
MOVD atanhtabh2075<>+0(SB), R2
SRW $31, R1, R1
FMOVD 104(R5), F4
MOVW R1, R1
SLD $3, R1, R1
WORD $0x68012000 //ld %f0,0(%r1,%r2)
WFMADB V2, V4, V0, V4
VLEG $0, 96(R5), V16
FDIV F4, F2
WORD $0xC0298006 //iilf %r2,2147909631
BYTE $0x7F
BYTE $0xFF
FMOVD 88(R5), F6
FMOVD 80(R5), F1
FMOVD 72(R5), F7
FMOVD 64(R5), F5
FMOVD F2, F4
WORD $0xED405088 //adb %f4,.L12-.L10(%r5)
BYTE $0x00
BYTE $0x1A
WORD $0xB3CD0044 //lgdr %r4, %f4
SRAD $32, R4
FMOVD F4, F3
WORD $0xED305088 //sdb %f3,.L12-.L10(%r5)
BYTE $0x00
BYTE $0x1B
SUBW R4, R2
WFSDB V3, V2, V3
WORD $0xEC1220AF //risbg %r1,%r2,32,128+47,0
BYTE $0x00
BYTE $0x55
SLD $32, R1, R1
WORD $0xB3C10021 //ldgr %f2,%r1
WFMADB V4, V2, V16, V4
SRAW $8, R2, R1
WFMADB V4, V5, V6, V5
WFMDB V4, V4, V6
WFMADB V4, V1, V7, V1
WFMADB V2, V3, V4, V2
WFMADB V1, V6, V5, V1
FMOVD 56(R5), F3
FMOVD 48(R5), F5
WFMADB V4, V5, V3, V4
FMOVD 40(R5), F3
FMADD F1, F6, F4
FMOVD 32(R5), F1
FMADD F3, F2, F1
ANDW $0xFFFFFF00, R1
WFMADB V6, V4, V1, V6
FMOVD 24(R5), F3
ORW $0x45000000, R1
WFMADB V2, V6, V3, V6
VLVGF $0, R1, V4
LDEBR F4, F4
WORD $0xEC2239BC //risbg %r2,%r2,57,128+60,64-13
BYTE $0x33
BYTE $0x55
MOVD atanhtab2076<>+0(SB), R1
FMOVD 16(R5), F3
WORD $0x68521000 //ld %f5,0(%r2,%r1)
FMOVD 8(R5), F1
WFMADB V2, V6, V5, V2
WFMADB V4, V3, V1, V4
FMOVD 0(R5), F6
FMADD F6, F4, F2
FMUL F2, F0
FMOVD F0, ret+8(FP)
RET
......@@ -22,7 +22,9 @@ package math
// Cbrt(±0) = ±0
// Cbrt(±Inf) = ±Inf
// Cbrt(NaN) = NaN
func Cbrt(x float64) float64 {
func Cbrt(x float64) float64
func cbrt(x float64) float64 {
const (
B1 = 715094163 // (682-0.03306235651)*2**20
B2 = 696219795 // (664-0.03306235651)*2**20
......
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00
DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00
DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00
DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00
DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00
DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00
DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625
DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00
DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336.
GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72
// Index tables
DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202
DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000
DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605
DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303
DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a
DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808
DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f
DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d
DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312
DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010
GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80
DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141
DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130
DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112
DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101
DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0
DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2
DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1
DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0
DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092
DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081
DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070
DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052
DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041
DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030
DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012
DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001
GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128
DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1
DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90
DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532
DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1
DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90
DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532
DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1
DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90
DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1
DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90
DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532
DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1
DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90
DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532
DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1
DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90
GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128
// Cbrt returns the cube root of the argument.
//
// Special cases are:
// Cbrt(±0) = ±0
// CbrtInf) = ±Inf
// Cbrt(NaN) = NaN
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·cbrtAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD cbrtrodataL9<>+0(SB), R9
WORD $0xB3CD0020 //lgdr %r2, %f0
WORD $0xC039000F //iilf %r3,1048575
BYTE $0xFF
BYTE $0xFF
SRAD $32, R2
WORD $0xB9170012 //llgtr %r1,%r2
MOVW R1, R6
MOVW R3, R7
CMPBLE R6, R7, L2
WORD $0xC0397FEF //iilf %r3,2146435071
BYTE $0xFF
BYTE $0xFF
MOVW R3, R7
CMPBLE R6, R7, L8
L1:
FMOVD F0, ret+8(FP)
RET
L3:
L2:
WORD $0xB3120000 //ltdbr %f0,%f0
BEQ L1
FMOVD F0, F2
WORD $0xED209040 //mdb %f2,.L10-.L9(%r9)
BYTE $0x00
BYTE $0x1C
MOVH $0x200, R4
WORD $0xB3CD0022 //lgdr %r2, %f2
SRAD $32, R2
L4:
WORD $0xEC3239BE //risbg %r3,%r2,57,128+62,64-25
BYTE $0x27
BYTE $0x55
MOVD cbrttab12067<>+0(SB), R1
WORD $0x48131000 //lh %r1,0(%r3,%r1)
WORD $0xEC3239BE //risbg %r3,%r2,57,128+62,64-19
BYTE $0x2D
BYTE $0x55
MOVD cbrttab22068<>+0(SB), R5
WORD $0xEC223CBF //risbgn %r2,%r2,64-4,128+63,64+44+4
BYTE $0x70
BYTE $0x59
WORD $0x4A135000 //ah %r1,0(%r3,%r5)
BYTE $0x18 //lr %r3,%r1
BYTE $0x31
MOVD cbrttab32069<>+0(SB), R1
FMOVD 56(R9), F1
FMOVD 48(R9), F5
WORD $0xEC23393B //rosbg %r2,%r3,57,59,4
BYTE $0x04
BYTE $0x56
WORD $0xE3121000 //llc %r1,0(%r2,%r1)
BYTE $0x00
BYTE $0x94
ADDW R3, R1
ADDW R4, R1
SLW $16, R1, R1
SLD $32, R1, R1
WORD $0xB3C10021 //ldgr %f2,%r1
WFMDB V2, V2, V4
WFMDB V4, V0, V6
WFMSDB V4, V6, V2, V4
FMOVD 40(R9), F6
FMSUB F1, F4, F2
FMOVD 32(R9), F4
WFMDB V2, V2, V3
FMOVD 24(R9), F1
FMUL F3, F0
FMOVD 16(R9), F3
WFMADB V2, V0, V5, V2
FMOVD 8(R9), F5
FMADD F6, F2, F4
WFMADB V2, V1, V3, V1
WFMDB V2, V2, V6
FMOVD 0(R9), F3
WFMADB V4, V6, V1, V4
WFMADB V2, V5, V3, V2
FMADD F4, F6, F2
FMADD F2, F0, F0
FMOVD F0, ret+8(FP)
RET
L8:
MOVH $0x0, R4
BR L4
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32 arm
#include "textflag.h"
TEXT ·Cbrt(SB),NOSPLIT,$0
JMP ·cbrt(SB)
......@@ -185,7 +185,9 @@ const (
// Erf(+Inf) = 1
// Erf(-Inf) = -1
// Erf(NaN) = NaN
func Erf(x float64) float64 {
func Erf(x float64) float64
func erf(x float64) float64 {
const (
VeryTiny = 2.848094538889218e-306 // 0x0080000000000000
Small = 1.0 / (1 << 28) // 2**-28
......@@ -262,7 +264,9 @@ func Erf(x float64) float64 {
// Erfc(+Inf) = 0
// Erfc(-Inf) = 2
// Erfc(NaN) = NaN
func Erfc(x float64) float64 {
func Erfc(x float64) float64
func erfc(x float64) float64 {
const Tiny = 1.0 / (1 << 56) // 2**-56
// special cases
switch {
......
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·erfrodataL13<> + 0(SB)/8, $0.243673229298474689E+01
DATA ·erfrodataL13<> + 8(SB)/8, $-.654905018503145600E+00
DATA ·erfrodataL13<> + 16(SB)/8, $0.404669310217538718E+01
DATA ·erfrodataL13<> + 24(SB)/8, $-.564189219162765367E+00
DATA ·erfrodataL13<> + 32(SB)/8, $-.200104300906596851E+01
DATA ·erfrodataL13<> + 40(SB)/8, $0.5
DATA ·erfrodataL13<> + 48(SB)/8, $0.144070097650207154E+00
DATA ·erfrodataL13<> + 56(SB)/8, $-.116697735205906191E+00
DATA ·erfrodataL13<> + 64(SB)/8, $0.256847684882319665E-01
DATA ·erfrodataL13<> + 72(SB)/8, $-.510805169106229148E-02
DATA ·erfrodataL13<> + 80(SB)/8, $0.885258164825590267E-03
DATA ·erfrodataL13<> + 88(SB)/8, $-.133861989591931411E-03
DATA ·erfrodataL13<> + 96(SB)/8, $0.178294867340272534E-04
DATA ·erfrodataL13<> + 104(SB)/8, $-.211436095674019218E-05
DATA ·erfrodataL13<> + 112(SB)/8, $0.225503753499344434E-06
DATA ·erfrodataL13<> + 120(SB)/8, $-.218247939190783624E-07
DATA ·erfrodataL13<> + 128(SB)/8, $0.193179206264594029E-08
DATA ·erfrodataL13<> + 136(SB)/8, $-.157440643541715319E-09
DATA ·erfrodataL13<> + 144(SB)/8, $0.118878583237342616E-10
DATA ·erfrodataL13<> + 152(SB)/8, $0.554289288424588473E-13
DATA ·erfrodataL13<> + 160(SB)/8, $-.277649758489502214E-14
DATA ·erfrodataL13<> + 168(SB)/8, $-.839318416990049443E-12
DATA ·erfrodataL13<> + 176(SB)/8, $-2.25
DATA ·erfrodataL13<> + 184(SB)/8, $.12837916709551258632
DATA ·erfrodataL13<> + 192(SB)/8, $1.0
DATA ·erfrodataL13<> + 200(SB)/8, $0.500000000000004237e+00
DATA ·erfrodataL13<> + 208(SB)/8, $1.0
DATA ·erfrodataL13<> + 216(SB)/8, $0.416666664838056960e-01
DATA ·erfrodataL13<> + 224(SB)/8, $0.166666666630345592e+00
DATA ·erfrodataL13<> + 232(SB)/8, $0.138926439368309441e-02
DATA ·erfrodataL13<> + 240(SB)/8, $0.833349307718286047e-02
DATA ·erfrodataL13<> + 248(SB)/8, $-.693147180559945286e+00
DATA ·erfrodataL13<> + 256(SB)/8, $-.144269504088896339e+01
DATA ·erfrodataL13<> + 264(SB)/8, $281475245147134.9375
DATA ·erfrodataL13<> + 272(SB)/8, $0.358256136398192529E+01
DATA ·erfrodataL13<> + 280(SB)/8, $-.554084396500738270E+00
DATA ·erfrodataL13<> + 288(SB)/8, $0.203630123025312046E+02
DATA ·erfrodataL13<> + 296(SB)/8, $-.735750304705934424E+01
DATA ·erfrodataL13<> + 304(SB)/8, $0.250491598091071797E+02
DATA ·erfrodataL13<> + 312(SB)/8, $-.118955882760959931E+02
DATA ·erfrodataL13<> + 320(SB)/8, $0.942903335085524187E+01
DATA ·erfrodataL13<> + 328(SB)/8, $-.564189522219085689E+00
DATA ·erfrodataL13<> + 336(SB)/8, $-.503767199403555540E+01
DATA ·erfrodataL13<> + 344(SB)/8, $0xbbc79ca10c924223
DATA ·erfrodataL13<> + 352(SB)/8, $0.004099975562609307E+01
DATA ·erfrodataL13<> + 360(SB)/8, $-.324434353381296556E+00
DATA ·erfrodataL13<> + 368(SB)/8, $0.945204812084476250E-01
DATA ·erfrodataL13<> + 376(SB)/8, $-.221407443830058214E-01
DATA ·erfrodataL13<> + 384(SB)/8, $0.426072376238804349E-02
DATA ·erfrodataL13<> + 392(SB)/8, $-.692229229127016977E-03
DATA ·erfrodataL13<> + 400(SB)/8, $0.971111253652087188E-04
DATA ·erfrodataL13<> + 408(SB)/8, $-.119752226272050504E-04
DATA ·erfrodataL13<> + 416(SB)/8, $0.131662993588532278E-05
DATA ·erfrodataL13<> + 424(SB)/8, $0.115776482315851236E-07
DATA ·erfrodataL13<> + 432(SB)/8, $-.780118522218151687E-09
DATA ·erfrodataL13<> + 440(SB)/8, $-.130465975877241088E-06
DATA ·erfrodataL13<> + 448(SB)/8, $-0.25
GLOBL ·erfrodataL13<> + 0(SB), RODATA, $456
// Table of log correction terms
DATA ·erftab2066<> + 0(SB)/8, $0.442737824274138381e-01
DATA ·erftab2066<> + 8(SB)/8, $0.263602189790660309e-01
DATA ·erftab2066<> + 16(SB)/8, $0.122565642281703586e-01
DATA ·erftab2066<> + 24(SB)/8, $0.143757052860721398e-02
DATA ·erftab2066<> + 32(SB)/8, $-.651375034121276075e-02
DATA ·erftab2066<> + 40(SB)/8, $-.119317678849450159e-01
DATA ·erftab2066<> + 48(SB)/8, $-.150868749549871069e-01
DATA ·erftab2066<> + 56(SB)/8, $-.161992609578469234e-01
DATA ·erftab2066<> + 64(SB)/8, $-.154492360403337917e-01
DATA ·erftab2066<> + 72(SB)/8, $-.129850717389178721e-01
DATA ·erftab2066<> + 80(SB)/8, $-.892902649276657891e-02
DATA ·erftab2066<> + 88(SB)/8, $-.338202636596794887e-02
DATA ·erftab2066<> + 96(SB)/8, $0.357266307045684762e-02
DATA ·erftab2066<> + 104(SB)/8, $0.118665304327406698e-01
DATA ·erftab2066<> + 112(SB)/8, $0.214434994118118914e-01
DATA ·erftab2066<> + 120(SB)/8, $0.322580645161290314e-01
GLOBL ·erftab2066<> + 0(SB), RODATA, $128
// Table of +/- 1.0
DATA ·erftab12067<> + 0(SB)/8, $1.0
DATA ·erftab12067<> + 8(SB)/8, $-1.0
GLOBL ·erftab12067<> + 0(SB), RODATA, $16
// Erf returns the error function of the argument.
//
// Special cases are:
// Erf(+Inf) = 1
// Erf(-Inf) = -1
// Erf(NaN) = NaN
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·erfAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD erfrodataL13<>+0(SB), R5
WORD $0xB3CD0010 //lgdr %r1, %f0
FMOVD F0, F6
SRAD $48, R1
MOVH $16383, R3
WORD $0xEC2131BF //risbg %r2,%r1,49,128+63,0
BYTE $0x00
BYTE $0x55
MOVW R2, R6
MOVW R3, R7
CMPBGT R6, R7, L2
MOVH $12287, R1
MOVW R1, R7
CMPBLE R6, R7 ,L12
MOVH $16367, R1
MOVW R1, R7
CMPBGT R6, R7, L5
FMOVD 448(R5), F4
FMADD F0, F0, F4
FMOVD 440(R5), F3
WFMDB V4, V4, V2
FMOVD 432(R5), F0
FMOVD 424(R5), F1
WFMADB V2, V0, V3, V0
FMOVD 416(R5), F3
WFMADB V2, V1, V3, V1
FMOVD 408(R5), F5
FMOVD 400(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V1, V3, V1
FMOVD 392(R5), F5
FMOVD 384(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V1, V3, V1
FMOVD 376(R5), F5
FMOVD 368(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V1, V3, V1
FMOVD 360(R5), F5
FMOVD 352(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V1, V3, V2
WFMADB V4, V0, V2, V0
WFMADB V6, V0, V6, V0
L1:
FMOVD F0, ret+8(FP)
RET
L2:
MOVH R1, R1
MOVH $16407, R3
SRW $31, R1, R1
MOVW R2, R6
MOVW R3, R7
CMPBLE R6, R7, L6
MOVW R1, R1
SLD $3, R1, R1
MOVD erftab12067<>+0(SB), R3
WORD $0x68013000 //ld %f0,0(%r1,%r3)
MOVH $32751, R1
MOVW R1, R7
CMPBGT R6, R7, L7
FMOVD 344(R5), F2
FMADD F2, F0, F0
L7:
WFCEDBS V6, V6, V2
BEQ L1
FMOVD F6, F0
FMOVD F0, ret+8(FP)
RET
L6:
MOVW R1, R1
SLD $3, R1, R1
MOVD erftab12067<>+0(SB), R4
WFMDB V0, V0, V1
MOVH $0x0, R3
WORD $0x68014000 //ld %f0,0(%r1,%r4)
MOVH $16399, R1
MOVW R2, R6
MOVW R1, R7
CMPBGT R6, R7, L8
FMOVD 336(R5), F3
FMOVD 328(R5), F2
FMOVD F1, F4
WFMADB V1, V2, V3, V2
WORD $0xED405140 //adb %f4,.L30-.L13(%r5)
BYTE $0x00
BYTE $0x1A
FMOVD 312(R5), F3
WFMADB V1, V2, V3, V2
FMOVD 304(R5), F3
WFMADB V1, V4, V3, V4
FMOVD 296(R5), F3
WFMADB V1, V2, V3, V2
FMOVD 288(R5), F3
WFMADB V1, V4, V3, V4
FMOVD 280(R5), F3
WFMADB V1, V2, V3, V2
FMOVD 272(R5), F3
WFMADB V1, V4, V3, V4
L9:
FMOVD 264(R5), F3
FMUL F4, F6
FMOVD 256(R5), F4
WFMADB V1, V4, V3, V4
FDIV F6, F2
WORD $0xB3CD0014 //lgdr %r1, %f4
FSUB F3, F4
FMOVD 248(R5), F6
WFMSDB V4, V6, V1, V4
FMOVD 240(R5), F1
FMOVD 232(R5), F6
WFMADB V4, V6, V1, V6
FMOVD 224(R5), F1
FMOVD 216(R5), F3
WFMADB V4, V3, V1, V3
WFMDB V4, V4, V1
FMOVD 208(R5), F5
WFMADB V6, V1, V3, V6
FMOVD 200(R5), F3
MOVH R1,R1
WFMADB V4, V3, V5, V3
WORD $0xEC2139BC //risbg %r2,%r1,57,128+60,3
BYTE $0x03
BYTE $0x55
WFMADB V1, V6, V3, V6
WORD $0xEC31000F //risbgn %r3,%r1,64-64+0,64-64+0+16-1,64-0-16
BYTE $0x30
BYTE $0x59
MOVD erftab2066<>+0(SB), R1
FMOVD 192(R5), F1
WORD $0xB3C10033 //ldgr %f3,%r3
WORD $0xED221000 //madb %f2,%f2,0(%r2,%r1)
BYTE $0x20
BYTE $0x1E
WFMADB V4, V6, V1, V4
FMUL F3, F2
FMADD F4, F2, F0
FMOVD F0, ret+8(FP)
RET
L12:
FMOVD 184(R5), F0
WFMADB V6, V0, V6, V0
FMOVD F0, ret+8(FP)
RET
L5:
FMOVD 176(R5), F1
FMADD F0, F0, F1
FMOVD 168(R5), F3
WFMDB V1, V1, V2
FMOVD 160(R5), F0
FMOVD 152(R5), F4
WFMADB V2, V0, V3, V0
FMOVD 144(R5), F3
WFMADB V2, V4, V3, V4
FMOVD 136(R5), F5
FMOVD 128(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V4, V3, V4
FMOVD 120(R5), F5
FMOVD 112(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V4, V3, V4
FMOVD 104(R5), F5
FMOVD 96(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V4, V3, V4
FMOVD 88(R5), F5
FMOVD 80(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V4, V3, V4
FMOVD 72(R5), F5
FMOVD 64(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V4, V3, V4
FMOVD 56(R5), F5
FMOVD 48(R5), F3
WFMADB V2, V0, V5, V0
WFMADB V2, V4, V3, V2
FMOVD 40(R5), F4
WFMADB V1, V0, V2, V0
FMUL F6, F0
FMADD F4, F6, F0
FMOVD F0, ret+8(FP)
RET
L8:
FMOVD 32(R5), F3
FMOVD 24(R5), F2
FMOVD F1, F4
WFMADB V1, V2, V3, V2
WORD $0xED405010 //adb %f4,.L68-.L13(%r5)
BYTE $0x00
BYTE $0x1A
FMOVD 8(R5), F3
WFMADB V1, V2, V3, V2
FMOVD ·erfrodataL13<>+0(SB), F3
WFMADB V1, V4, V3, V4
BR L9
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32 arm
#include "textflag.h"
TEXT ·Erf(SB),NOSPLIT,$0
JMP ·erf(SB)
TEXT ·Erfc(SB),NOSPLIT,$0
JMP ·erfc(SB)
This diff is collapsed.
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial approximation and other constants
DATA ·exprodataL22<> + 0(SB)/8, $800.0E+00
DATA ·exprodataL22<> + 8(SB)/8, $1.0000000000000022e+00
DATA ·exprodataL22<> + 16(SB)/8, $0.500000000000004237e+00
DATA ·exprodataL22<> + 24(SB)/8, $0.166666666630345592e+00
DATA ·exprodataL22<> + 32(SB)/8, $0.138926439368309441e-02
DATA ·exprodataL22<> + 40(SB)/8, $0.833349307718286047e-02
DATA ·exprodataL22<> + 48(SB)/8, $0.416666664838056960e-01
DATA ·exprodataL22<> + 56(SB)/8, $-.231904681384629956E-16
DATA ·exprodataL22<> + 64(SB)/8, $-.693147180559945286E+00
DATA ·exprodataL22<> + 72(SB)/8, $0.144269504088896339E+01
DATA ·exprodataL22<> + 80(SB)/8, $704.0E+00
GLOBL ·exprodataL22<> + 0(SB), RODATA, $88
DATA ·expxinf<> + 0(SB)/8, $0x7ff0000000000000
GLOBL ·expxinf<> + 0(SB), RODATA, $8
DATA ·expx4ff<> + 0(SB)/8, $0x4ff0000000000000
GLOBL ·expx4ff<> + 0(SB), RODATA, $8
DATA ·expx2ff<> + 0(SB)/8, $0x2ff0000000000000
GLOBL ·expx2ff<> + 0(SB), RODATA, $8
DATA ·expxaddexp<> + 0(SB)/8, $0xc2f0000100003fef
GLOBL ·expxaddexp<> + 0(SB), RODATA, $8
// Log multipliers table
DATA ·exptexp<> + 0(SB)/8, $0.442737824274138381E-01
DATA ·exptexp<> + 8(SB)/8, $0.263602189790660309E-01
DATA ·exptexp<> + 16(SB)/8, $0.122565642281703586E-01
DATA ·exptexp<> + 24(SB)/8, $0.143757052860721398E-02
DATA ·exptexp<> + 32(SB)/8, $-.651375034121276075E-02
DATA ·exptexp<> + 40(SB)/8, $-.119317678849450159E-01
DATA ·exptexp<> + 48(SB)/8, $-.150868749549871069E-01
DATA ·exptexp<> + 56(SB)/8, $-.161992609578469234E-01
DATA ·exptexp<> + 64(SB)/8, $-.154492360403337917E-01
DATA ·exptexp<> + 72(SB)/8, $-.129850717389178721E-01
DATA ·exptexp<> + 80(SB)/8, $-.892902649276657891E-02
DATA ·exptexp<> + 88(SB)/8, $-.338202636596794887E-02
DATA ·exptexp<> + 96(SB)/8, $0.357266307045684762E-02
DATA ·exptexp<> + 104(SB)/8, $0.118665304327406698E-01
DATA ·exptexp<> + 112(SB)/8, $0.214434994118118914E-01
DATA ·exptexp<> + 120(SB)/8, $0.322580645161290314E-01
GLOBL ·exptexp<> + 0(SB), RODATA, $128
// Exp returns e**x, the base-e exponential of x.
//
// Special cases are:
// Exp(+Inf) = +Inf
// Exp(NaN) = NaN
// Very large values overflow to 0 or +Inf.
// Very small values underflow to 1.
// The algorithm used is minimax polynomial approximation using a table of
// polynomial coefficients determined with a Remez exchange algorithm.
TEXT ·expAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD exprodataL22<>+0(SB), R5
WORD $0xB3120000 //ltdbr %f0,%f0
BLTU L20
FMOVD F0, F2
L2:
WORD $0xED205050 //cdb %f2,.L23-.L22(%r5)
BYTE $0x00
BYTE $0x19
BGE L16
BVS L16
WFCEDBS V2, V2, V2
BVS LEXITTAGexp
MOVD expxaddexp<>+0(SB), R1
FMOVD 72(R5), F6
FMOVD 0(R1), F2
WFMSDB V0, V6, V2, V6
FMOVD 64(R5), F4
FADD F6, F2
FMOVD 56(R5), F1
FMADD F4, F2, F0
FMOVD 48(R5), F3
WFMADB V2, V1, V0, V2
FMOVD 40(R5), F1
FMOVD 32(R5), F4
FMUL F0, F0
WFMADB V2, V4, V1, V4
WORD $0xB3CD0016 //lgdr %r1,%f6
FMOVD 24(R5), F1
WFMADB V2, V3, V1, V3
FMOVD 16(R5), F1
WFMADB V0, V4, V3, V4
FMOVD 8(R5), F3
WFMADB V2, V1, V3, V1
WORD $0xEC3139BC //risbg %r3,%r1,57,128+60,3
BYTE $0x03
BYTE $0x55
WFMADB V0, V4, V1, V0
MOVD exptexp<>+0(SB), R2
WORD $0x68432000 //ld %f4,0(%r3,%r2)
FMADD F4, F2, F2
SLD $48, R1, R2
WFMADB V2, V0, V4, V2
WORD $0xB3C10002 //ldgr %f0,%r2
FMADD F0, F2, F0
FMOVD F0, ret+8(FP)
RET
L16:
WFCEDBS V2, V2, V4
BVS LEXITTAGexp
WORD $0xED205000 //cdb %f2,.L33-.L22(%r5)
BYTE $0x00
BYTE $0x19
BLT L6
WFCEDBS V2, V0, V0
BVS L13
MOVD expxinf<>+0(SB), R1
FMOVD 0(R1), F0
FMOVD F0, ret+8(FP)
RET
L20:
WORD $0xB3130020 //lcdbr %f2,%f0
BR L2
L6:
MOVD expxaddexp<>+0(SB), R1
FMOVD 72(R5), F3
FMOVD 0(R1), F4
WFMSDB V0, V3, V4, V3
FMOVD 64(R5), F6
FADD F3, F4
FMOVD 56(R5), F5
WFMADB V4, V6, V0, V6
FMOVD 32(R5), F1
WFMADB V4, V5, V6, V4
FMOVD 40(R5), F5
FMUL F6, F6
WFMADB V4, V1, V5, V1
FMOVD 48(R5), F7
WORD $0xB3CD0013 //lgdr %r1,%f3
FMOVD 24(R5), F5
WFMADB V4, V7, V5, V7
FMOVD 16(R5), F5
WFMADB V6, V1, V7, V1
FMOVD 8(R5), F7
WFMADB V4, V5, V7, V5
WORD $0xEC3139BC //risbg %r3,%r1,57,128+60,3
BYTE $0x03
BYTE $0x55
WFMADB V6, V1, V5, V6
MOVD exptexp<>+0(SB), R2
WFCHDBS V2, V0, V0
WORD $0x68132000 //ld %f1,0(%r3,%r2)
FMADD F1, F4, F4
MOVD $0x4086000000000000, R2
WFMADB V4, V6, V1, V4
BEQ L21
ADDW $0xF000, R1
WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
BYTE $0x30
BYTE $0x59
WORD $0xB3C10002 //ldgr %f0,%r2
FMADD F0, F4, F0
MOVD expx4ff<>+0(SB), R3
FMOVD 0(R3), F2
FMUL F2, F0
FMOVD F0, ret+8(FP)
RET
L13:
FMOVD $0, F0
FMOVD F0, ret+8(FP)
RET
L21:
ADDW $0x1000, R1
WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
BYTE $0x30
BYTE $0x59
WORD $0xB3C10002 //ldgr %f0,%r2
FMADD F0, F4, F0
MOVD expx2ff<>+0(SB), R3
FMOVD 0(R3), F2
FMUL F2, F0
FMOVD F0, ret+8(FP)
RET
LEXITTAGexp:
FMOVD F0, ret+8(FP)
RET
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial approximation and other constants
DATA ·expm1rodataL22<> + 0(SB)/8, $-1.0
DATA ·expm1rodataL22<> + 8(SB)/8, $800.0E+00
DATA ·expm1rodataL22<> + 16(SB)/8, $1.0
DATA ·expm1rodataL22<> + 24(SB)/8, $-.231904681384629956E-16
DATA ·expm1rodataL22<> + 32(SB)/8, $0.50000000000000029671E+00
DATA ·expm1rodataL22<> + 40(SB)/8, $0.16666666666666676570E+00
DATA ·expm1rodataL22<> + 48(SB)/8, $0.83333333323590973444E-02
DATA ·expm1rodataL22<> + 56(SB)/8, $0.13889096526400683566E-02
DATA ·expm1rodataL22<> + 64(SB)/8, $0.41666666661701152924E-01
DATA ·expm1rodataL22<> + 72(SB)/8, $0.19841562053987360264E-03
DATA ·expm1rodataL22<> + 80(SB)/8, $-.693147180559945286E+00
DATA ·expm1rodataL22<> + 88(SB)/8, $0.144269504088896339E+01
DATA ·expm1rodataL22<> + 96(SB)/8, $704.0E+00
GLOBL ·expm1rodataL22<> + 0(SB), RODATA, $104
DATA ·expm1xmone<> + 0(SB)/8, $0xbff0000000000000
GLOBL ·expm1xmone<> + 0(SB), RODATA, $8
DATA ·expm1xinf<> + 0(SB)/8, $0x7ff0000000000000
GLOBL ·expm1xinf<> + 0(SB), RODATA, $8
DATA ·expm1x4ff<> + 0(SB)/8, $0x4ff0000000000000
GLOBL ·expm1x4ff<> + 0(SB), RODATA, $8
DATA ·expm1x2ff<> + 0(SB)/8, $0x2ff0000000000000
GLOBL ·expm1x2ff<> + 0(SB), RODATA, $8
DATA ·expm1xaddexp<> + 0(SB)/8, $0xc2f0000100003ff0
GLOBL ·expm1xaddexp<> + 0(SB), RODATA, $8
// Log multipliers table
DATA ·expm1tab<> + 0(SB)/8, $0.0
DATA ·expm1tab<> + 8(SB)/8, $-.171540871271399150E-01
DATA ·expm1tab<> + 16(SB)/8, $-.306597931864376363E-01
DATA ·expm1tab<> + 24(SB)/8, $-.410200970469965021E-01
DATA ·expm1tab<> + 32(SB)/8, $-.486343079978231466E-01
DATA ·expm1tab<> + 40(SB)/8, $-.538226193725835820E-01
DATA ·expm1tab<> + 48(SB)/8, $-.568439602538111520E-01
DATA ·expm1tab<> + 56(SB)/8, $-.579091847395528847E-01
DATA ·expm1tab<> + 64(SB)/8, $-.571909584179366341E-01
DATA ·expm1tab<> + 72(SB)/8, $-.548312665987204407E-01
DATA ·expm1tab<> + 80(SB)/8, $-.509471843643441085E-01
DATA ·expm1tab<> + 88(SB)/8, $-.456353588448863359E-01
DATA ·expm1tab<> + 96(SB)/8, $-.389755254243262365E-01
DATA ·expm1tab<> + 104(SB)/8, $-.310332908285244231E-01
DATA ·expm1tab<> + 112(SB)/8, $-.218623539150173528E-01
DATA ·expm1tab<> + 120(SB)/8, $-.115062908917949451E-01
GLOBL ·expm1tab<> + 0(SB), RODATA, $128
// Expm1 returns e**x - 1, the base-e exponential of x minus 1.
// It is more accurate than Exp(x) - 1 when x is near zero.
//
// Special cases are:
// Expm1(+Inf) = +Inf
// Expm1(-Inf) = -1
// Expm1(NaN) = NaN
// Very large values overflow to -1 or +Inf.
// The algorithm used is minimax polynomial approximation using a table of
// polynomial coefficients determined with a Remez exchange algorithm.
TEXT ·expm1Asm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD expm1rodataL22<>+0(SB), R5
WORD $0xB3120000 //ltdbr %f0,%f0
BLTU L20
FMOVD F0, F2
L2:
WORD $0xED205060 //cdb %f2,.L23-.L22(%r5)
BYTE $0x00
BYTE $0x19
BGE L16
BVS L16
WFCEDBS V2, V2, V2
BVS LEXITTAGexpm1
MOVD expm1xaddexp<>+0(SB), R1
FMOVD 88(R5), F1
FMOVD 0(R1), F2
WFMSDB V0, V1, V2, V1
FMOVD 80(R5), F6
WFADB V1, V2, V4
FMOVD 72(R5), F2
FMADD F6, F4, F0
FMOVD 64(R5), F3
FMOVD 56(R5), F6
FMOVD 48(R5), F5
FMADD F2, F0, F6
WFMADB V0, V5, V3, V5
WFMDB V0, V0, V2
WORD $0xB3CD0011 //lgdr %r1,%f1
WFMADB V6, V2, V5, V6
FMOVD 40(R5), F3
FMOVD 32(R5), F5
WFMADB V0, V3, V5, V3
FMOVD 24(R5), F5
WFMADB V2, V6, V3, V2
FMADD F5, F4, F0
FMOVD 16(R5), F6
WFMADB V0, V2, V6, V2
WORD $0xEC3139BC //risbg %r3,%r1,57,128+60,3
BYTE $0x03
BYTE $0x55
WORD $0xB3130022 //lcdbr %f2,%f2
MOVD expm1tab<>+0(SB), R2
WORD $0x68432000 //ld %f4,0(%r3,%r2)
FMADD F4, F0, F0
SLD $48, R1, R2
WFMSDB V2, V0, V4, V0
WORD $0xB3C10042 //ldgr %f4,%r2
WORD $0xB3130000 //lcdbr %f0,%f0
FSUB F4, F6
WFMSDB V0, V4, V6, V0
FMOVD F0, ret+8(FP)
RET
L16:
WFCEDBS V2, V2, V4
BVS LEXITTAGexpm1
WORD $0xED205008 //cdb %f2,.L34-.L22(%r5)
BYTE $0x00
BYTE $0x19
BLT L6
WFCEDBS V2, V0, V0
BVS L7
MOVD expm1xinf<>+0(SB), R1
FMOVD 0(R1), F0
FMOVD F0, ret+8(FP)
RET
L20:
WORD $0xB3130020 //lcdbr %f2,%f0
BR L2
L6:
MOVD expm1xaddexp<>+0(SB), R1
FMOVD 88(R5), F5
FMOVD 0(R1), F4
WFMSDB V0, V5, V4, V5
FMOVD 80(R5), F3
WFADB V5, V4, V1
VLEG $0, 48(R5), V16
WFMADB V1, V3, V0, V3
FMOVD 56(R5), F4
FMOVD 64(R5), F7
FMOVD 72(R5), F6
WFMADB V3, V16, V7, V16
WFMADB V3, V6, V4, V6
WFMDB V3, V3, V4
MOVD expm1tab<>+0(SB), R2
WFMADB V6, V4, V16, V6
VLEG $0, 32(R5), V16
FMOVD 40(R5), F7
WFMADB V3, V7, V16, V7
VLEG $0, 24(R5), V16
WFMADB V4, V6, V7, V4
WFMADB V1, V16, V3, V1
FMOVD 16(R5), F6
FMADD F4, F1, F6
WORD $0xB3CD0015 //lgdr %r1,%f5
WORD $0xB3130066 //lcdbr %f6,%f6
WORD $0xEC3139BC //risbg %r3,%r1,57,128+60,3
BYTE $0x03
BYTE $0x55
WORD $0x68432000 //ld %f4,0(%r3,%r2)
FMADD F4, F1, F1
MOVD $0x4086000000000000, R2
FMSUB F1, F6, F4
WORD $0xB3130044 //lcdbr %f4,%f4
WFCHDBS V2, V0, V0
BEQ L21
ADDW $0xF000, R1
WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
BYTE $0x30
BYTE $0x59
WORD $0xB3C10002 //ldgr %f0,%r2
FMADD F0, F4, F0
MOVD expm1x4ff<>+0(SB), R3
FMOVD 0(R5), F4
FMOVD 0(R3), F2
WFMADB V2, V0, V4, V0
FMOVD F0, ret+8(FP)
RET
L7:
MOVD expm1xmone<>+0(SB), R1
FMOVD 0(R1), F0
FMOVD F0, ret+8(FP)
RET
L21:
ADDW $0x1000, R1
WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
BYTE $0x30
BYTE $0x59
WORD $0xB3C10002 //ldgr %f0,%r2
FMADD F0, F4, F0
MOVD expm1x2ff<>+0(SB), R3
FMOVD 0(R5), F4
FMOVD 0(R3), F2
WFMADB V2, V0, V4, V0
FMOVD F0, ret+8(FP)
RET
LEXITTAGexpm1:
FMOVD F0, ret+8(FP)
RET
......@@ -11,4 +11,21 @@ var CoshNoVec = cosh
var SinNoVec = sin
var SinhNoVec = sinh
var TanhNoVec = tanh
var Log1pNovec = log1p
var AtanhNovec = atanh
var AcosNovec = acos
var AcoshNovec = acosh
var AsinNovec = asin
var AsinhNovec = asinh
var ErfNovec = erf
var ErfcNovec = erfc
var AtanNovec = atan
var Atan2Novec = atan2
var CbrtNovec = cbrt
var LogNovec = log
var TanNovec = tan
var ExpNovec = exp
var Expm1Novec = expm1
var PowNovec = pow
var HypotNovec = hypot
var HasVX = hasVX
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Constants
DATA ·log1pxlim<> + 0(SB)/4, $0xfff00000
GLOBL ·log1pxlim<> + 0(SB), RODATA, $4
DATA ·log1pxzero<> + 0(SB)/8, $0.0
GLOBL ·log1pxzero<> + 0(SB), RODATA, $8
DATA ·log1pxminf<> + 0(SB)/8, $0xfff0000000000000
GLOBL ·log1pxminf<> + 0(SB), RODATA, $8
DATA ·log1pxnan<> + 0(SB)/8, $0x7ff8000000000000
GLOBL ·log1pxnan<> + 0(SB), RODATA, $8
DATA ·log1pyout<> + 0(SB)/8, $0x40fce621e71da000
GLOBL ·log1pyout<> + 0(SB), RODATA, $8
DATA ·log1pxout<> + 0(SB)/8, $0x40f1000000000000
GLOBL ·log1pxout<> + 0(SB), RODATA, $8
DATA ·log1pxl2<> + 0(SB)/8, $0xbfda7aecbeba4e46
GLOBL ·log1pxl2<> + 0(SB), RODATA, $8
DATA ·log1pxl1<> + 0(SB)/8, $0x3ffacde700000000
GLOBL ·log1pxl1<> + 0(SB), RODATA, $8
DATA ·log1pxa<> + 0(SB)/8, $5.5
GLOBL ·log1pxa<> + 0(SB), RODATA, $8
DATA ·log1pxmone<> + 0(SB)/8, $-1.0
GLOBL ·log1pxmone<> + 0(SB), RODATA, $8
// Minimax polynomial approximations
DATA ·log1pc8<> + 0(SB)/8, $0.212881813645679599E-07
GLOBL ·log1pc8<> + 0(SB), RODATA, $8
DATA ·log1pc7<> + 0(SB)/8, $-.148682720127920854E-06
GLOBL ·log1pc7<> + 0(SB), RODATA, $8
DATA ·log1pc6<> + 0(SB)/8, $0.938370938292558173E-06
GLOBL ·log1pc6<> + 0(SB), RODATA, $8
DATA ·log1pc5<> + 0(SB)/8, $-.602107458843052029E-05
GLOBL ·log1pc5<> + 0(SB), RODATA, $8
DATA ·log1pc4<> + 0(SB)/8, $0.397389654305194527E-04
GLOBL ·log1pc4<> + 0(SB), RODATA, $8
DATA ·log1pc3<> + 0(SB)/8, $-.273205381970859341E-03
GLOBL ·log1pc3<> + 0(SB), RODATA, $8
DATA ·log1pc2<> + 0(SB)/8, $0.200350613573012186E-02
GLOBL ·log1pc2<> + 0(SB), RODATA, $8
DATA ·log1pc1<> + 0(SB)/8, $-.165289256198351540E-01
GLOBL ·log1pc1<> + 0(SB), RODATA, $8
DATA ·log1pc0<> + 0(SB)/8, $0.181818181818181826E+00
GLOBL ·log1pc0<> + 0(SB), RODATA, $8
// Table of log10 correction terms
DATA ·log1ptab<> + 0(SB)/8, $0.585235384085551248E-01
DATA ·log1ptab<> + 8(SB)/8, $0.412206153771168640E-01
DATA ·log1ptab<> + 16(SB)/8, $0.273839003221648339E-01
DATA ·log1ptab<> + 24(SB)/8, $0.166383778368856480E-01
DATA ·log1ptab<> + 32(SB)/8, $0.866678223433169637E-02
DATA ·log1ptab<> + 40(SB)/8, $0.319831684989627514E-02
DATA ·log1ptab<> + 48(SB)/8, $-.000000000000000000E+00
DATA ·log1ptab<> + 56(SB)/8, $-.113006378583725549E-02
DATA ·log1ptab<> + 64(SB)/8, $-.367979419636602491E-03
DATA ·log1ptab<> + 72(SB)/8, $0.213172484510484979E-02
DATA ·log1ptab<> + 80(SB)/8, $0.623271047682013536E-02
DATA ·log1ptab<> + 88(SB)/8, $0.118140812789696885E-01
DATA ·log1ptab<> + 96(SB)/8, $0.187681358930914206E-01
DATA ·log1ptab<> + 104(SB)/8, $0.269985148668178992E-01
DATA ·log1ptab<> + 112(SB)/8, $0.364186619761331328E-01
DATA ·log1ptab<> + 120(SB)/8, $0.469505379381388441E-01
GLOBL ·log1ptab<> + 0(SB), RODATA, $128
// Log1p returns the natural logarithm of 1 plus its argument x.
// It is more accurate than Log(1 + x) when x is near zero.
//
// Special cases are:
// Log1p(+Inf) = +Inf
// Log1p(±0) = ±0
// Log1p(-1) = -Inf
// Log1p(x < -1) = NaN
// Log1p(NaN) = NaN
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·log1pAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD log1pxmone<>+0(SB), R1
MOVD ·log1pxout<>+0(SB), R2
FMOVD 0(R1), F3
MOVD log1pxa<>+0(SB), R1
MOVWZ ·log1pxlim<>+0(SB), R0
FMOVD 0(R1), F1
MOVD log1pc8<>+0(SB), R1
FMOVD 0(R1), F5
MOVD log1pc7<>+0(SB), R1
VLEG $0, 0(R1), V20
MOVD log1pc6<>+0(SB), R1
WFSDB V0, V3, V4
VLEG $0, 0(R1), V18
MOVD log1pc5<>+0(SB), R1
VLEG $0, 0(R1), V16
MOVD R2, R5
WORD $0xB3CD0034 //lgdr %r3,%f4
WORD $0xC0190006 //iilf %r1,425983
BYTE $0x7F
BYTE $0xFF
SRAD $32, R3, R3
SUBW R3, R1
SRW $16, R1, R1
BYTE $0x18 //lr %r4,%r1
BYTE $0x41
WORD $0xEC24000F //risbgn %r2,%r4,64-64+0,64-64+0+16-1,64-0-16
BYTE $0x30
BYTE $0x59
WORD $0xEC54101F //risbgn %r5,%r4,64-64+16,64-64+16+16-1,64-16-16
BYTE $0x20
BYTE $0x59
MOVW R0, R6
MOVW R3, R7
CMPBGT R6, R7, L8
WFCEDBS V4, V4, V6
MOVD log1pxzero<>+0(SB), R1
FMOVD 0(R1), F2
BVS LEXITTAGlog1p
WORD $0xB3130044
WFCEDBS V2, V4, V6
BEQ L9
WFCHDBS V4, V2, V2
BEQ LEXITTAGlog1p
MOVD log1pxnan<>+0(SB), R1
FMOVD 0(R1), F0
FMOVD F0, ret+8(FP)
RET
L8:
WORD $0xB3C10022 //ldgr %f2,%r2
FSUB F4, F3
FMADD F2, F4, F1
MOVD log1pc4<>+0(SB), R2
WORD $0xB3130041
FMOVD 0(R2), F7
FSUB F3, F0
MOVD log1pc3<>+0(SB), R2
FMOVD 0(R2), F3
MOVD log1pc2<>+0(SB), R2
WFMDB V1, V1, V6
FMADD F7, F4, F3
WFMSDB V0, V2, V1, V0
FMOVD 0(R2), F7
WFMADB V4, V5, V20, V5
MOVD log1pc1<>+0(SB), R2
FMOVD 0(R2), F2
FMADD F7, F4, F2
WFMADB V4, V18, V16, V4
FMADD F3, F6, F2
WFMADB V5, V6, V4, V5
FMUL F6, F6
MOVD log1pc0<>+0(SB), R2
WFMADB V6, V5, V2, V6
FMOVD 0(R2), F4
WFMADB V0, V6, V4, V6
WORD $0xEC1139BC //risbg %r1,%r1,57,128+60,3
BYTE $0x03
BYTE $0x55
MOVD log1ptab<>+0(SB), R2
MOVD log1pxl1<>+0(SB), R3
WORD $0x68112000 //ld %f1,0(%r1,%r2)
FMOVD 0(R3), F2
WFMADB V0, V6, V1, V0
MOVD log1pyout<>+0(SB), R1
WORD $0xB3C10065 //ldgr %f6,%r5
FMOVD 0(R1), F4
WFMSDB V2, V6, V4, V2
MOVD log1pxl2<>+0(SB), R1
FMOVD 0(R1), F4
FMADD F4, F2, F0
FMOVD F0, ret+8(FP)
RET
L9:
MOVD log1pxminf<>+0(SB), R1
FMOVD 0(R1), F0
FMOVD F0, ret+8(FP)
RET
LEXITTAGlog1p:
FMOVD F0, ret+8(FP)
RET
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial approximations
DATA ·logrodataL21<> + 0(SB)/8, $-.499999999999999778E+00
DATA ·logrodataL21<> + 8(SB)/8, $0.333333333333343751E+00
DATA ·logrodataL21<> + 16(SB)/8, $-.250000000001606881E+00
DATA ·logrodataL21<> + 24(SB)/8, $0.199999999971603032E+00
DATA ·logrodataL21<> + 32(SB)/8, $-.166666663114122038E+00
DATA ·logrodataL21<> + 40(SB)/8, $-.125002923782692399E+00
DATA ·logrodataL21<> + 48(SB)/8, $0.111142014580396256E+00
DATA ·logrodataL21<> + 56(SB)/8, $0.759438932618934220E-01
DATA ·logrodataL21<> + 64(SB)/8, $0.142857144267212549E+00
DATA ·logrodataL21<> + 72(SB)/8, $-.993038938793590759E-01
DATA ·logrodataL21<> + 80(SB)/8, $-1.0
GLOBL ·logrodataL21<> + 0(SB), RODATA, $88
// Constants
DATA ·logxminf<> + 0(SB)/8, $0xfff0000000000000
GLOBL ·logxminf<> + 0(SB), RODATA, $8
DATA ·logxnan<> + 0(SB)/8, $0x7ff8000000000000
GLOBL ·logxnan<> + 0(SB), RODATA, $8
DATA ·logx43f<> + 0(SB)/8, $0x43f0000000000000
GLOBL ·logx43f<> + 0(SB), RODATA, $8
DATA ·logxl2<> + 0(SB)/8, $0x3fda7aecbeba4e46
GLOBL ·logxl2<> + 0(SB), RODATA, $8
DATA ·logxl1<> + 0(SB)/8, $0x3ffacde700000000
GLOBL ·logxl1<> + 0(SB), RODATA, $8
/* Input transform scale and add constants */
DATA ·logxm<> + 0(SB)/8, $0x3fc77604e63c84b1
DATA ·logxm<> + 8(SB)/8, $0x40fb39456ab53250
DATA ·logxm<> + 16(SB)/8, $0x3fc9ee358b945f3f
DATA ·logxm<> + 24(SB)/8, $0x40fb39418bf3b137
DATA ·logxm<> + 32(SB)/8, $0x3fccfb2e1304f4b6
DATA ·logxm<> + 40(SB)/8, $0x40fb393d3eda3022
DATA ·logxm<> + 48(SB)/8, $0x3fd0000000000000
DATA ·logxm<> + 56(SB)/8, $0x40fb393969e70000
DATA ·logxm<> + 64(SB)/8, $0x3fd11117aafbfe04
DATA ·logxm<> + 72(SB)/8, $0x40fb3936eaefafcf
DATA ·logxm<> + 80(SB)/8, $0x3fd2492af5e658b2
DATA ·logxm<> + 88(SB)/8, $0x40fb39343ff01715
DATA ·logxm<> + 96(SB)/8, $0x3fd3b50c622a43dd
DATA ·logxm<> + 104(SB)/8, $0x40fb39315adae2f3
DATA ·logxm<> + 112(SB)/8, $0x3fd56bbeea918777
DATA ·logxm<> + 120(SB)/8, $0x40fb392e21698552
GLOBL ·logxm<> + 0(SB), RODATA, $128
// Log returns the natural logarithm of the argument.
//
// Special cases are:
// Log(+Inf) = +Inf
// Log(0) = -Inf
// Log(x < 0) = NaN
// Log(NaN) = NaN
// The algorithm used is minimax polynomial approximation using a table of
// polynomial coefficients determined with a Remez exchange algorithm.
TEXT ·logAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD logrodataL21<>+0(SB), R9
MOVH $0x8006, R4
WORD $0xB3CD0010 //lgdr %r1,%f0
MOVD $0x3FF0000000000000, R6
SRAD $48, R1, R1
MOVD $0x40F03E8000000000, R8
SUBW R1, R4
WORD $0xEC2420BB //risbg %r2,%r4,32,128+59,0
BYTE $0x00
BYTE $0x55
WORD $0xEC62000F //risbgn %r6,%r2,64-64+0,64-64+0+16-1,64-0-16
BYTE $0x30
BYTE $0x59
WORD $0xEC82101F //risbgn %r8,%r2,64-64+16,64-64+16+16-1,64-16-16
BYTE $0x20
BYTE $0x59
MOVW R1, R7
CMPBGT R7, $22, L17
WORD $0xB3120000 //ltdbr %f0,%f0
MOVD logx43f<>+0(SB), R1
FMOVD 0(R1), F2
BLEU L3
MOVH $0x8005, R12
MOVH $0x8405, R0
BR L15
L7:
WORD $0xB3120000 //ltdbr %f0,%f0
BLEU L3
L15:
FMUL F2, F0
WORD $0xB3CD0010 //lgdr %r1,%f0
SRAD $48, R1, R1
SUBW R1, R0, R2
SUBW R1, R12, R3
BYTE $0x18 //lr %r4,%r2
BYTE $0x42
ANDW $0xFFFFFFF0, R3
ANDW $0xFFFFFFF0, R2
BYTE $0x18 //lr %r5,%r1
BYTE $0x51
MOVW R1, R7
CMPBLE R7, $22, L7
WORD $0xEC63000F //risbgn %r6,%r3,64-64+0,64-64+0+16-1,64-0-16
BYTE $0x30
BYTE $0x59
WORD $0xEC82101F //risbgn %r8,%r2,64-64+16,64-64+16+16-1,64-16-16
BYTE $0x20
BYTE $0x59
L2:
MOVH R5, R5
MOVH $0x7FEF, R1
CMPW R5, R1
BGT L1
WORD $0xB3C10026 //ldgr %f2,%r6
FMUL F2, F0
WORD $0xEC4439BB //risbg %r4,%r4,57,128+59,3
BYTE $0x03
BYTE $0x55
FMOVD 80(R9), F2
MOVD logxm<>+0(SB), R7
ADD R7, R4
FMOVD 72(R9), F4
WORD $0xED004000 //madb %f2,%f0,0(%r4)
BYTE $0x20
BYTE $0x1E
FMOVD 64(R9), F1
FMOVD F2, F0
FMOVD 56(R9), F2
WFMADB V0, V2, V4, V2
WFMDB V0, V0, V6
FMOVD 48(R9), F4
WFMADB V0, V2, V4, V2
FMOVD 40(R9), F4
WFMADB V2, V6, V1, V2
FMOVD 32(R9), F1
WFMADB V6, V4, V1, V4
FMOVD 24(R9), F1
WFMADB V6, V2, V1, V2
FMOVD 16(R9), F1
WFMADB V6, V4, V1, V4
MOVD logxl1<>+0(SB), R1
FMOVD 8(R9), F1
WFMADB V6, V2, V1, V2
FMOVD 0(R9), F1
WFMADB V6, V4, V1, V4
FMOVD 8(R4), F1
WFMADB V0, V2, V4, V2
WORD $0xB3C10048 //ldgr %f4,%r8
WFMADB V6, V2, V0, V2
WORD $0xED401000 //msdb %f1,%f4,0(%r1)
BYTE $0x10
BYTE $0x1F
MOVD ·logxl2<>+0(SB), R1
WORD $0xB3130001 //lcdbr %f0,%f1
WORD $0xB3C10041 //ldgr %f4,%r1
WFMADB V0, V4, V2, V0
L1:
FMOVD F0, ret+8(FP)
RET
L3:
WORD $0xB3120000 //ltdbr %f0,%f0
BEQ L20
BGE L1
BVS L1
MOVD logxnan<>+0(SB), R1
FMOVD 0(R1), F0
BR L1
L20:
MOVD logxminf<>+0(SB), R1
FMOVD 0(R1), F0
FMOVD F0, ret+8(FP)
RET
L17:
BYTE $0x18 //lr %r5,%r1
BYTE $0x51
BR L2
......@@ -35,7 +35,9 @@ func isOddInt(x float64) bool {
// Pow(+Inf, y) = +0 for y < 0
// Pow(-Inf, y) = Pow(-0, -y)
// Pow(x, y) = NaN for finite x < 0 and finite non-integer y
func Pow(x, y float64) float64 {
func Pow(x, y float64) float64
func pow(x, y float64) float64 {
switch {
case y == 0 || x == 1:
return 1
......
This diff is collapsed.
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32 arm
#include "textflag.h"
TEXT ·Pow(SB),NOSPLIT,$0
JMP ·pow(SB)
......@@ -12,15 +12,33 @@ TEXT ·Asin(SB),NOSPLIT,$0
TEXT ·Acos(SB),NOSPLIT,$0
B ·acos(SB)
TEXT ·Asinh(SB),NOSPLIT,$0
B ·asinh(SB)
TEXT ·Acosh(SB),NOSPLIT,$0
B ·acosh(SB)
TEXT ·Atan2(SB),NOSPLIT,$0
B ·atan2(SB)
TEXT ·Atan(SB),NOSPLIT,$0
B ·atan(SB)
TEXT ·Atanh(SB),NOSPLIT,$0
B ·atanh(SB)
TEXT ·Exp2(SB),NOSPLIT,$0
B ·exp2(SB)
TEXT ·Erf(SB),NOSPLIT,$0
B ·erf(SB)
TEXT ·Erfc(SB),NOSPLIT,$0
B ·erfc(SB)
TEXT ·Cbrt(SB),NOSPLIT,$0
B ·cbrt(SB)
TEXT ·Cosh(SB),NOSPLIT,$0
B ·cosh(SB)
......@@ -71,3 +89,6 @@ TEXT ·Tan(SB),NOSPLIT,$0
TEXT ·Tanh(SB),NOSPLIT,$0
B ·tanh(SB)
TEXT ·Pow(SB),NOSPLIT,$0
B ·pow(SB)
......@@ -12,12 +12,21 @@ TEXT ·Asin(SB),NOSPLIT,$0
TEXT ·Acos(SB),NOSPLIT,$0
JMP ·acos(SB)
TEXT ·Asinh(SB),NOSPLIT,$0
JMP ·asinh(SB)
TEXT ·Acosh(SB),NOSPLIT,$0
JMP ·acosh(SB)
TEXT ·Atan2(SB),NOSPLIT,$0
JMP ·atan2(SB)
TEXT ·Atan(SB),NOSPLIT,$0
JMP ·atan(SB)
TEXT ·Atanh(SB),NOSPLIT,$0
JMP ·atanh(SB)
TEXT ·Dim(SB),NOSPLIT,$0
JMP ·dim(SB)
......@@ -27,6 +36,12 @@ TEXT ·Min(SB),NOSPLIT,$0
TEXT ·Max(SB),NOSPLIT,$0
JMP ·max(SB)
TEXT ·Erf(SB),NOSPLIT,$0
JMP ·erf(SB)
TEXT ·Erfc(SB),NOSPLIT,$0
JMP ·erfc(SB)
TEXT ·Exp2(SB),NOSPLIT,$0
JMP ·exp2(SB)
......@@ -95,3 +110,9 @@ TEXT ·Tan(SB),NOSPLIT,$0
TEXT ·Tanh(SB),NOSPLIT,$0
JMP ·tanh(SB)
TEXT ·Cbrt(SB),NOSPLIT,$0
JMP ·cbrt(SB)
TEXT ·Pow(SB),NOSPLIT,$0
JMP ·pow(SB)
......@@ -12,12 +12,21 @@ TEXT ·Asin(SB),NOSPLIT,$0
TEXT ·Acos(SB),NOSPLIT,$0
JMP ·acos(SB)
TEXT ·Asinh(SB),NOSPLIT,$0
JMP ·asinh(SB)
TEXT ·Acosh(SB),NOSPLIT,$0
JMP ·acosh(SB)
TEXT ·Atan2(SB),NOSPLIT,$0
JMP ·atan2(SB)
TEXT ·Atan(SB),NOSPLIT,$0
JMP ·atan(SB)
TEXT ·Atanh(SB),NOSPLIT,$0
JMP ·atanh(SB)
TEXT ·Dim(SB),NOSPLIT,$0
JMP ·dim(SB)
......@@ -27,6 +36,12 @@ TEXT ·Min(SB),NOSPLIT,$0
TEXT ·Max(SB),NOSPLIT,$0
JMP ·max(SB)
TEXT ·Erf(SB),NOSPLIT,$0
JMP ·erf(SB)
TEXT ·Erfc(SB),NOSPLIT,$0
JMP ·erfc(SB)
TEXT ·Exp2(SB),NOSPLIT,$0
JMP ·exp2(SB)
......@@ -93,3 +108,9 @@ TEXT ·Tan(SB),NOSPLIT,$0
TEXT ·Tanh(SB),NOSPLIT,$0
JMP ·tanh(SB)
TEXT ·Cbrt(SB),NOSPLIT,$0
JMP ·cbrt(SB)
TEXT ·Pow(SB),NOSPLIT,$0
JMP ·pow(SB)
......@@ -12,12 +12,21 @@ TEXT ·Asin(SB),NOSPLIT,$0
TEXT ·Acos(SB),NOSPLIT,$0
BR ·acos(SB)
TEXT ·Asinh(SB),NOSPLIT,$0
BR ·asinh(SB)
TEXT ·Acosh(SB),NOSPLIT,$0
BR ·acosh(SB)
TEXT ·Atan2(SB),NOSPLIT,$0
BR ·atan2(SB)
TEXT ·Atan(SB),NOSPLIT,$0
BR ·atan(SB)
TEXT ·Atanh(SB),NOSPLIT,$0
BR ·atanh(SB)
TEXT ·Dim(SB),NOSPLIT,$0
BR ·dim(SB)
......@@ -27,6 +36,12 @@ TEXT ·Min(SB),NOSPLIT,$0
TEXT ·Max(SB),NOSPLIT,$0
BR ·max(SB)
TEXT ·Erf(SB),NOSPLIT,$0
BR ·erf(SB)
TEXT ·Erfc(SB),NOSPLIT,$0
BR ·erfc(SB)
TEXT ·Exp2(SB),NOSPLIT,$0
BR ·exp2(SB)
......@@ -83,3 +98,10 @@ TEXT ·Tan(SB),NOSPLIT,$0
TEXT ·Tanh(SB),NOSPLIT,$0
BR ·tanh(SB)
TEXT ·Cbrt(SB),NOSPLIT,$0
BR ·cbrt(SB)
TEXT ·Pow(SB),NOSPLIT,$0
BR ·pow(SB)
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment