Commit 0157c72d authored by Shenghou Ma's avatar Shenghou Ma

runtime: inline several float64 routines to speed up complex128 division

Depends on CL 6197045.

Result obtained on Core i7 620M, Darwin/amd64:
benchmark                       old ns/op    new ns/op    delta
BenchmarkComplex128DivNormal           57           28  -50.78%
BenchmarkComplex128DivNisNaN           49           15  -68.90%
BenchmarkComplex128DivDisNaN           49           15  -67.88%
BenchmarkComplex128DivNisInf           40           12  -68.50%
BenchmarkComplex128DivDisInf           33           13  -61.06%

Result obtained on Core i7 620M, Darwin/386:
benchmark                       old ns/op    new ns/op    delta
BenchmarkComplex128DivNormal           89           50  -44.05%
BenchmarkComplex128DivNisNaN          307          802  +161.24%
BenchmarkComplex128DivDisNaN          309          788  +155.02%
BenchmarkComplex128DivNisInf          278          237  -14.75%
BenchmarkComplex128DivDisInf           46           22  -52.46%

Result obtained on 700MHz OMAP4460, Linux/ARM:
benchmark                       old ns/op    new ns/op    delta
BenchmarkComplex128DivNormal         1557          465  -70.13%
BenchmarkComplex128DivNisNaN         1443          220  -84.75%
BenchmarkComplex128DivDisNaN         1481          218  -85.28%
BenchmarkComplex128DivNisInf          952          216  -77.31%
BenchmarkComplex128DivDisInf          861          231  -73.17%

The 386 version has a performance regression, but as we have
decided to use SSE2 instead of x87 FPU for 386 too (issue 3912),
I won't address this issue.

R=dsymonds, mchaten, iant, dave, mtj, rsc, r
CC=golang-dev
https://golang.org/cl/6024045
parent c8423f90
...@@ -13,28 +13,30 @@ runtime·complex128div(Complex128 n, Complex128 d, Complex128 q) ...@@ -13,28 +13,30 @@ runtime·complex128div(Complex128 n, Complex128 d, Complex128 q)
float64 a, b, ratio, denom; float64 a, b, ratio, denom;
// Special cases as in C99. // Special cases as in C99.
ninf = runtime·isInf(n.real, 0) || runtime·isInf(n.imag, 0); ninf = n.real == runtime·posinf || n.real == runtime·neginf ||
dinf = runtime·isInf(d.real, 0) || runtime·isInf(d.imag, 0); n.imag == runtime·posinf || n.imag == runtime·neginf;
dinf = d.real == runtime·posinf || d.real == runtime·neginf ||
d.imag == runtime·posinf || d.imag == runtime·neginf;
nnan = !ninf && (runtime·isNaN(n.real) || runtime·isNaN(n.imag)); nnan = !ninf && (ISNAN(n.real) || ISNAN(n.imag));
dnan = !dinf && (runtime·isNaN(d.real) || runtime·isNaN(d.imag)); dnan = !dinf && (ISNAN(d.real) || ISNAN(d.imag));
if(nnan || dnan) { if(nnan || dnan) {
q.real = runtime·NaN(); q.real = runtime·nan;
q.imag = runtime·NaN(); q.imag = runtime·nan;
} else if(ninf && !dinf && !dnan) { } else if(ninf && !dinf) {
q.real = runtime·Inf(0); q.real = runtime·posinf;
q.imag = runtime·Inf(0); q.imag = runtime·posinf;
} else if(!ninf && !nnan && dinf) { } else if(!ninf && dinf) {
q.real = 0; q.real = 0;
q.imag = 0; q.imag = 0;
} else if(d.real == 0 && d.imag == 0) { } else if(d.real == 0 && d.imag == 0) {
if(n.real == 0 && n.imag == 0) { if(n.real == 0 && n.imag == 0) {
q.real = runtime·NaN(); q.real = runtime·nan;
q.imag = runtime·NaN(); q.imag = runtime·nan;
} else { } else {
q.real = runtime·Inf(0); q.real = runtime·posinf;
q.imag = runtime·Inf(0); q.imag = runtime·posinf;
} }
} else { } else {
// Standard complex arithmetic, factored to avoid unnecessary overflow. // Standard complex arithmetic, factored to avoid unnecessary overflow.
......
...@@ -4,170 +4,7 @@ ...@@ -4,170 +4,7 @@
#include "runtime.h" #include "runtime.h"
static uint64 uvnan = 0x7FF8000000000001ULL; // used as float64 via runtime· names
static uint64 uvinf = 0x7FF0000000000000ULL; uint64 ·nan = 0x7FF8000000000001ULL;
static uint64 uvneginf = 0xFFF0000000000000ULL; uint64 ·posinf = 0x7FF0000000000000ULL;
uint64 ·neginf = 0xFFF0000000000000ULL;
uint32
runtime·float32tobits(float32 f)
{
// The obvious cast-and-pointer code is technically
// not valid, and gcc miscompiles it. Use a union instead.
union {
float32 f;
uint32 i;
} u;
u.f = f;
return u.i;
}
uint64
runtime·float64tobits(float64 f)
{
// The obvious cast-and-pointer code is technically
// not valid, and gcc miscompiles it. Use a union instead.
union {
float64 f;
uint64 i;
} u;
u.f = f;
return u.i;
}
float64
runtime·float64frombits(uint64 i)
{
// The obvious cast-and-pointer code is technically
// not valid, and gcc miscompiles it. Use a union instead.
union {
float64 f;
uint64 i;
} u;
u.i = i;
return u.f;
}
float32
runtime·float32frombits(uint32 i)
{
// The obvious cast-and-pointer code is technically
// not valid, and gcc miscompiles it. Use a union instead.
union {
float32 f;
uint32 i;
} u;
u.i = i;
return u.f;
}
bool
runtime·isInf(float64 f, int32 sign)
{
uint64 x;
x = runtime·float64tobits(f);
if(sign == 0)
return x == uvinf || x == uvneginf;
if(sign > 0)
return x == uvinf;
return x == uvneginf;
}
float64
runtime·NaN(void)
{
return runtime·float64frombits(uvnan);
}
bool
runtime·isNaN(float64 f)
{
uint64 x;
x = runtime·float64tobits(f);
return ((uint32)(x>>52) & 0x7FF) == 0x7FF && !runtime·isInf(f, 0);
}
float64
runtime·Inf(int32 sign)
{
if(sign >= 0)
return runtime·float64frombits(uvinf);
else
return runtime·float64frombits(uvneginf);
}
enum
{
MASK = 0x7ffL,
SHIFT = 64-11-1,
BIAS = 1022L,
};
float64
runtime·frexp(float64 d, int32 *ep)
{
uint64 x;
if(d == 0) {
*ep = 0;
return 0;
}
x = runtime·float64tobits(d);
*ep = (int32)((x >> SHIFT) & MASK) - BIAS;
x &= ~((uint64)MASK << SHIFT);
x |= (uint64)BIAS << SHIFT;
return runtime·float64frombits(x);
}
float64
runtime·ldexp(float64 d, int32 e)
{
uint64 x;
if(d == 0)
return 0;
x = runtime·float64tobits(d);
e += (int32)(x >> SHIFT) & MASK;
if(e <= 0)
return 0; /* underflow */
if(e >= MASK){ /* overflow */
if(d < 0)
return runtime·Inf(-1);
return runtime·Inf(1);
}
x &= ~((uint64)MASK << SHIFT);
x |= (uint64)e << SHIFT;
return runtime·float64frombits(x);
}
float64
runtime·modf(float64 d, float64 *ip)
{
float64 dd;
uint64 x;
int32 e;
if(d < 1) {
if(d < 0) {
d = runtime·modf(-d, ip);
*ip = -*ip;
return -d;
}
*ip = 0;
return d;
}
x = runtime·float64tobits(d);
e = (int32)((x >> SHIFT) & MASK) - BIAS;
/*
* Keep the top 11+e bits; clear the rest.
*/
if(e <= 64-11)
x &= ~(((uint64)1 << (64LL-11LL-e))-1);
dd = runtime·float64frombits(x);
*ip = dd;
return d - dd;
}
...@@ -209,15 +209,15 @@ runtime·printfloat(float64 v) ...@@ -209,15 +209,15 @@ runtime·printfloat(float64 v)
int32 e, s, i, n; int32 e, s, i, n;
float64 h; float64 h;
if(runtime·isNaN(v)) { if(ISNAN(v)) {
gwrite("NaN", 3); gwrite("NaN", 3);
return; return;
} }
if(runtime·isInf(v, 1)) { if(v == runtime·posinf) {
gwrite("+Inf", 4); gwrite("+Inf", 4);
return; return;
} }
if(runtime·isInf(v, -1)) { if(v == runtime·neginf) {
gwrite("-Inf", 4); gwrite("-Inf", 4);
return; return;
} }
......
...@@ -815,3 +815,12 @@ uintptr runtime·memlimit(void); ...@@ -815,3 +815,12 @@ uintptr runtime·memlimit(void);
// is forced to deliver the signal to a thread that's actually running. // is forced to deliver the signal to a thread that's actually running.
// This is a no-op on other systems. // This is a no-op on other systems.
void runtime·setprof(bool); void runtime·setprof(bool);
// float.c
extern float64 runtime·nan;
extern float64 runtime·posinf;
extern float64 runtime·neginf;
extern uint64 ·nan;
extern uint64 ·posinf;
extern uint64 ·neginf;
#define ISNAN(f) ((f) != (f))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment