Commit 3a66bc41 authored by Russ Cox's avatar Russ Cox

runtime: use OS X vsyscall for gettimeofday (amd64)

Thanks to Dave Cheney for the magic words "comm page".

benchmark       old ns/op    new ns/op    delta
BenchmarkNow          197           33  -83.05%

This should make profiling a little better on OS X.
The raw time saved is unlikely to matter: what likely matters
more is that it seems like OS X sends profiling signals on the
way out of system calls more often than it should; avoiding
the system call should increase the accuracy of cpu profiles.

The 386 version would be similar but needs to do different
math for CPU speeds less than 1 GHz. (Apparently Apple has
never shipped a 64-bit CPU with such a slow clock.)

R=golang-dev, bradfitz, dave, minux.ma, r
CC=golang-dev
https://golang.org/cl/6275056
parent c7be4def
......@@ -65,26 +65,60 @@ TEXT runtime·madvise(SB), 7, $0
MOVL $0xf1, 0xf1 // crash
RET
// func now() (sec int64, nsec int32)
TEXT time·now(SB), 7, $32
MOVQ SP, DI // must be non-nil, unused
MOVQ $0, SI
MOVL $(0x2000000+116), AX
SYSCALL
// sec is in AX, usec in DX
MOVQ AX, sec+0(FP)
IMULQ $1000, DX
MOVL DX, nsec+8(FP)
RET
// OS X comm page time offsets
// http://www.opensource.apple.com/source/xnu/xnu-1699.26.8/osfmk/i386/cpu_capabilities.h
#define nt_tsc_base 0x50
#define nt_scale 0x58
#define nt_shift 0x5c
#define nt_ns_base 0x60
#define nt_generation 0x68
#define gtod_generation 0x6c
#define gtod_ns_base 0x70
#define gtod_sec_base 0x78
// int64 nanotime(void)
TEXT runtime·nanotime(SB), 7, $32
MOVQ $0x7fffffe00000, BP /* comm page base */
// Loop trying to take a consistent snapshot
// of the time parameters.
timeloop:
MOVL gtod_generation(BP), R8
TESTL R8, R8
JZ systime
MOVL nt_generation(BP), R9
TESTL R9, R9
JZ timeloop
RDTSC
MOVQ nt_tsc_base(BP), R10
MOVL nt_scale(BP), R11
MOVQ nt_ns_base(BP), R12
CMPL nt_generation(BP), R9
JNE timeloop
MOVQ gtod_ns_base(BP), R13
MOVQ gtod_sec_base(BP), R14
CMPL gtod_generation(BP), R8
JNE timeloop
// Gathered all the data we need. Compute time.
// ((tsc - nt_tsc_base) * nt_scale) >> 32 + nt_ns_base - gtod_ns_base + gtod_sec_base*1e9
// The multiply and shift extracts the top 64 bits of the 96-bit product.
SHLQ $32, DX
ADDQ DX, AX
SUBQ R10, AX
MULQ R11
SHRQ $32, AX:DX
ADDQ R12, AX
SUBQ R13, AX
IMULQ $1000000000, R14
ADDQ R14, AX
RET
systime:
// Fall back to system call (usually first call in this thread).
MOVQ SP, DI // must be non-nil, unused
MOVQ $0, SI
MOVL $(0x2000000+116), AX
SYSCALL
// sec is in AX, usec in DX
// return nsec in AX
IMULQ $1000000000, AX
......@@ -92,6 +126,25 @@ TEXT runtime·nanotime(SB), 7, $32
ADDQ DX, AX
RET
// func now() (sec int64, nsec int32)
TEXT time·now(SB),7,$0
CALL runtime·nanotime(SB)
// generated code for
// func f(x uint64) (uint64, uint64) { return x/1000000000, x%100000000 }
// adapted to reduce duplication
MOVQ AX, CX
MOVQ $1360296554856532783, AX
MULQ CX
ADDQ CX, DX
RCRQ $1, DX
SHRQ $29, DX
MOVQ DX, sec+0(FP)
IMULQ $1000000000, DX
SUBQ DX, CX
MOVL CX, nsec+8(FP)
RET
TEXT runtime·sigprocmask(SB),7,$0
MOVL 8(SP), DI
MOVQ 16(SP), SI
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment