Commit 5a6c5809 authored by Martin Möhrmann's avatar Martin Möhrmann

runtime: refactor cpu feature detection for 386 & amd64

Changes all cpu features to be detected and stored in bools in rt0_go.

Updates: #15403

Change-Id: I5a9961cdec789b331d09c44d86beb53833d5dc3e
Reviewed-on: https://go-review.googlesource.com/41950
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 1f85d3ad
...@@ -283,9 +283,9 @@ func alginit() { ...@@ -283,9 +283,9 @@ func alginit() {
// Install aes hash algorithm if we have the instructions we need // Install aes hash algorithm if we have the instructions we need
if (GOARCH == "386" || GOARCH == "amd64") && if (GOARCH == "386" || GOARCH == "amd64") &&
GOOS != "nacl" && GOOS != "nacl" &&
cpuid_ecx&(1<<25) != 0 && // aes (aesenc) support_aes && // AESENC
cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb) support_ssse3 && // PSHUFB
cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q}) support_sse41 { // PINSR{D,Q}
useAeshash = true useAeshash = true
algarray[alg_MEM32].hash = aeshash32 algarray[alg_MEM32].hash = aeshash32
algarray[alg_MEM64].hash = aeshash64 algarray[alg_MEM64].hash = aeshash64
......
...@@ -75,24 +75,81 @@ notintel: ...@@ -75,24 +75,81 @@ notintel:
MOVL $1, AX MOVL $1, AX
CPUID CPUID
MOVL CX, DI // Move to global variable clobbers CX when generating PIC MOVL CX, DI // Move to global variable clobbers CX when generating PIC
MOVL AX, runtime·cpuid_eax(SB) MOVL AX, runtime·processorVersionInfo(SB)
MOVL DI, runtime·cpuid_ecx(SB) MOVL DI, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB) MOVL DX, runtime·cpuid_edx(SB)
// Check for MMX support // Check for MMX support
TESTL $(1<<23), DX // MMX TESTL $(1<<23), DX // MMX
JZ bad_proc JZ bad_proc
TESTL $(1<<26), DX // SSE2
SETNE runtime·support_sse2(SB)
TESTL $(1<<9), DI // SSSE3
SETNE runtime·support_ssse3(SB)
TESTL $(1<<19), DI // SSE4.1
SETNE runtime·support_sse41(SB)
TESTL $(1<<20), DI // SSE4.2
SETNE runtime·support_sse42(SB)
TESTL $(1<<23), DI // POPCNT
SETNE runtime·support_popcnt(SB)
TESTL $(1<<25), DI // AES
SETNE runtime·support_aes(SB)
TESTL $(1<<27), DI // OSXSAVE
SETNE runtime·support_osxsave(SB)
// If OS support for XMM and YMM is not present
// support_avx will be set back to false later.
TESTL $(1<<28), DI // AVX
SETNE runtime·support_avx(SB)
eax7:
// Load EAX=7/ECX=0 cpuid flags // Load EAX=7/ECX=0 cpuid flags
CMPL SI, $7 CMPL SI, $7
JLT nocpuinfo JLT osavx
MOVL $7, AX MOVL $7, AX
MOVL $0, CX MOVL $0, CX
CPUID CPUID
MOVL BX, runtime·cpuid_ebx7(SB) MOVL BX, runtime·cpuid_ebx7(SB)
nocpuinfo: TESTL $(1<<3), BX // BMI1
SETNE runtime·support_bmi1(SB)
// If OS support for XMM and YMM is not present
// support_avx2 will be set back to false later.
TESTL $(1<<5), BX
SETNE runtime·support_avx2(SB)
TESTL $(1<<8), BX // BMI2
SETNE runtime·support_bmi2(SB)
TESTL $(1<<9), BX // ERMS
SETNE runtime·support_erms(SB)
osavx:
// nacl does not support XGETBV to test
// for XMM and YMM OS support.
#ifndef GOOS_nacl
CMPB runtime·support_osxsave(SB), $1
JNE noavx
MOVL $0, CX
// For XGETBV, OSXSAVE bit is required and sufficient
XGETBV
ANDL $6, AX
CMPL AX, $6 // Check for OS support of XMM and YMM registers.
JE nocpuinfo
#endif
noavx:
MOVB $0, runtime·support_avx(SB)
MOVB $0, runtime·support_avx2(SB)
nocpuinfo:
// if there is an _cgo_init, call it to let it // if there is an _cgo_init, call it to let it
// initialize and to set up GS. if not, // initialize and to set up GS. if not,
// we set up GS ourselves. // we set up GS ourselves.
...@@ -803,8 +860,8 @@ TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 ...@@ -803,8 +860,8 @@ TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
// func cputicks() int64 // func cputicks() int64
TEXT runtime·cputicks(SB),NOSPLIT,$0-8 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence CMPB runtime·support_sse2(SB), $1
JEQ done JNE done
CMPB runtime·lfenceBeforeRdtsc(SB), $1 CMPB runtime·lfenceBeforeRdtsc(SB), $1
JNE mfence JNE mfence
BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
...@@ -1311,8 +1368,8 @@ TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 ...@@ -1311,8 +1368,8 @@ TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
hugeloop: hugeloop:
CMPL BX, $64 CMPL BX, $64
JB bigloop JB bigloop
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 CMPB runtime·support_sse2(SB), $1
JE bigloop JNE bigloop
MOVOU (SI), X0 MOVOU (SI), X0
MOVOU (DI), X1 MOVOU (DI), X1
MOVOU 16(SI), X2 MOVOU 16(SI), X2
...@@ -1455,8 +1512,8 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 ...@@ -1455,8 +1512,8 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
JEQ allsame JEQ allsame
CMPL BP, $4 CMPL BP, $4
JB small JB small
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 CMPB runtime·support_sse2(SB), $1
JE mediumloop JNE mediumloop
largeloop: largeloop:
CMPL BP, $16 CMPL BP, $16
JB mediumloop JB mediumloop
......
...@@ -26,10 +26,10 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0 ...@@ -26,10 +26,10 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
MOVQ SP, (g_stack+stack_hi)(DI) MOVQ SP, (g_stack+stack_hi)(DI)
// find out information about the processor we're on // find out information about the processor we're on
MOVQ $0, AX MOVL $0, AX
CPUID CPUID
MOVQ AX, SI MOVL AX, SI
CMPQ AX, $0 CMPL AX, $0
JE nocpuinfo JE nocpuinfo
// Figure out how to serialize RDTSC. // Figure out how to serialize RDTSC.
...@@ -46,62 +46,75 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0 ...@@ -46,62 +46,75 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
notintel: notintel:
// Load EAX=1 cpuid flags // Load EAX=1 cpuid flags
MOVQ $1, AX MOVL $1, AX
CPUID CPUID
MOVL AX, runtime·cpuid_eax(SB) MOVL AX, runtime·processorVersionInfo(SB)
MOVL CX, runtime·cpuid_ecx(SB) MOVL CX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB) MOVL DX, runtime·cpuid_edx(SB)
TESTL $(1<<26), DX // SSE2
SETNE runtime·support_sse2(SB)
TESTL $(1<<9), CX // SSSE3
SETNE runtime·support_ssse3(SB)
TESTL $(1<<19), CX // SSE4.1
SETNE runtime·support_sse41(SB)
TESTL $(1<<20), CX // SSE4.2
SETNE runtime·support_sse42(SB)
TESTL $(1<<23), CX // POPCNT
SETNE runtime·support_popcnt(SB)
TESTL $(1<<25), CX // AES
SETNE runtime·support_aes(SB)
TESTL $(1<<27), CX // OSXSAVE
SETNE runtime·support_osxsave(SB)
// If OS support for XMM and YMM is not present
// support_avx will be set back to false later.
TESTL $(1<<28), CX // AVX
SETNE runtime·support_avx(SB)
eax7:
// Load EAX=7/ECX=0 cpuid flags // Load EAX=7/ECX=0 cpuid flags
CMPQ SI, $7 CMPL SI, $7
JLT no7 JLT osavx
MOVL $7, AX MOVL $7, AX
MOVL $0, CX MOVL $0, CX
CPUID CPUID
MOVL BX, runtime·cpuid_ebx7(SB) MOVL BX, runtime·cpuid_ebx7(SB)
no7:
// Detect AVX and AVX2 as per 14.7.1 Detection of AVX2 chapter of [1] TESTL $(1<<3), BX // BMI1
// [1] 64-ia-32-architectures-software-developer-manual-325462.pdf SETNE runtime·support_bmi1(SB)
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
MOVL runtime·cpuid_ecx(SB), CX // If OS support for XMM and YMM is not present
ANDL $0x18000000, CX // check for OSXSAVE and AVX bits // support_avx2 will be set back to false later.
CMPL CX, $0x18000000 TESTL $(1<<5), BX
JNE noavx SETNE runtime·support_avx2(SB)
MOVL $0, CX
TESTL $(1<<8), BX // BMI2
SETNE runtime·support_bmi2(SB)
TESTL $(1<<9), BX // ERMS
SETNE runtime·support_erms(SB)
osavx:
CMPB runtime·support_osxsave(SB), $1
JNE noavx
MOVL $0, CX
// For XGETBV, OSXSAVE bit is required and sufficient // For XGETBV, OSXSAVE bit is required and sufficient
XGETBV XGETBV
ANDL $6, AX ANDL $6, AX
CMPL AX, $6 // Check for OS support of YMM registers CMPL AX, $6 // Check for OS support of XMM and YMM registers.
JNE noavx JE nocpuinfo
MOVB $1, runtime·support_avx(SB)
TESTL $(1<<5), runtime·cpuid_ebx7(SB) // check for AVX2 bit
JEQ noavx2
MOVB $1, runtime·support_avx2(SB)
JMP testbmi1
noavx: noavx:
MOVB $0, runtime·support_avx(SB) MOVB $0, runtime·support_avx(SB)
noavx2: MOVB $0, runtime·support_avx2(SB)
MOVB $0, runtime·support_avx2(SB)
testbmi1: nocpuinfo:
// Detect BMI1 and BMI2 extensions as per
// 5.1.16.1 Detection of VEX-encoded GPR Instructions,
// LZCNT and TZCNT, PREFETCHW chapter of [1]
MOVB $0, runtime·support_bmi1(SB)
TESTL $(1<<3), runtime·cpuid_ebx7(SB) // check for BMI1 bit
JEQ testbmi2
MOVB $1, runtime·support_bmi1(SB)
testbmi2:
MOVB $0, runtime·support_bmi2(SB)
TESTL $(1<<8), runtime·cpuid_ebx7(SB) // check for BMI2 bit
JEQ testpopcnt
MOVB $1, runtime·support_bmi2(SB)
testpopcnt:
MOVB $0, runtime·support_popcnt(SB)
TESTL $(1<<23), runtime·cpuid_ecx(SB) // check for POPCNT bit
JEQ nocpuinfo
MOVB $1, runtime·support_popcnt(SB)
nocpuinfo:
// if there is an _cgo_init, call it. // if there is an _cgo_init, call it.
MOVQ _cgo_init(SB), AX MOVQ _cgo_init(SB), AX
TESTQ AX, AX TESTQ AX, AX
...@@ -1942,9 +1955,8 @@ success_avx2: ...@@ -1942,9 +1955,8 @@ success_avx2:
VZEROUPPER VZEROUPPER
JMP success JMP success
sse42: sse42:
MOVL runtime·cpuid_ecx(SB), CX CMPB runtime·support_sse42(SB), $1
ANDL $0x100000, CX JNE no_sse42
JZ no_sse42
CMPQ AX, $12 CMPQ AX, $12
// PCMPESTRI is slower than normal compare, // PCMPESTRI is slower than normal compare,
// so using it makes sense only if we advance 4+ bytes per compare // so using it makes sense only if we advance 4+ bytes per compare
......
...@@ -28,9 +28,9 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0 ...@@ -28,9 +28,9 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
MOVL SP, (g_stack+stack_hi)(DI) MOVL SP, (g_stack+stack_hi)(DI)
// find out information about the processor we're on // find out information about the processor we're on
MOVQ $0, AX MOVL $0, AX
CPUID CPUID
CMPQ AX, $0 CMPL AX, $0
JE nocpuinfo JE nocpuinfo
CMPL BX, $0x756E6547 // "Genu" CMPL BX, $0x756E6547 // "Genu"
...@@ -42,13 +42,81 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0 ...@@ -42,13 +42,81 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
MOVB $1, runtime·isIntel(SB) MOVB $1, runtime·isIntel(SB)
notintel: notintel:
MOVQ $1, AX // Load EAX=1 cpuid flags
MOVL $1, AX
CPUID CPUID
MOVL AX, runtime·cpuid_eax(SB) MOVL AX, runtime·processorVersionInfo(SB)
MOVL CX, runtime·cpuid_ecx(SB) MOVL CX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB) MOVL DX, runtime·cpuid_edx(SB)
nocpuinfo:
TESTL $(1<<26), DX // SSE2
SETNE runtime·support_sse2(SB)
TESTL $(1<<9), CX // SSSE3
SETNE runtime·support_ssse3(SB)
TESTL $(1<<19), CX // SSE4.1
SETNE runtime·support_sse41(SB)
TESTL $(1<<20), CX // SSE4.2
SETNE runtime·support_sse42(SB)
TESTL $(1<<23), CX // POPCNT
SETNE runtime·support_popcnt(SB)
TESTL $(1<<25), CX // AES
SETNE runtime·support_aes(SB)
TESTL $(1<<27), CX // OSXSAVE
SETNE runtime·support_osxsave(SB)
// If OS support for XMM and YMM is not present
// support_avx will be set back to false later.
TESTL $(1<<28), CX // AVX
SETNE runtime·support_avx(SB)
eax7:
// Load EAX=7/ECX=0 cpuid flags
CMPL SI, $7
JLT osavx
MOVL $7, AX
MOVL $0, CX
CPUID
MOVL BX, runtime·cpuid_ebx7(SB)
TESTL $(1<<3), BX // BMI1
SETNE runtime·support_bmi1(SB)
// If OS support for XMM and YMM is not present
// support_avx2 will be set back to false later.
TESTL $(1<<5), BX
SETNE runtime·support_avx2(SB)
TESTL $(1<<8), BX // BMI2
SETNE runtime·support_bmi2(SB)
TESTL $(1<<9), BX // ERMS
SETNE runtime·support_erms(SB)
osavx:
// nacl does not support XGETBV to test
// for XMM and YMM OS support.
#ifndef GOOS_nacl
CMPB runtime·support_osxsave(SB), $1
JNE noavx
MOVL $0, CX
// For XGETBV, OSXSAVE bit is required and sufficient
XGETBV
ANDL $6, AX
CMPL AX, $6 // Check for OS support of XMM and YMM registers.
JE nocpuinfo
#endif
noavx:
MOVB $0, runtime·support_avx(SB)
MOVB $0, runtime·support_avx2(SB)
nocpuinfo:
needtls: needtls:
LEAL runtime·m0+m_tls(SB), DI LEAL runtime·m0+m_tls(SB), DI
CALL runtime·settls(SB) CALL runtime·settls(SB)
......
...@@ -8,13 +8,13 @@ var useAVXmemmove bool ...@@ -8,13 +8,13 @@ var useAVXmemmove bool
func init() { func init() {
// Let's remove stepping and reserved fields // Let's remove stepping and reserved fields
processorVersionInfo := cpuid_eax & 0x0FFF3FF0 processor := processorVersionInfo & 0x0FFF3FF0
isIntelBridgeFamily := isIntel && isIntelBridgeFamily := isIntel &&
(processorVersionInfo == 0x206A0 || processor == 0x206A0 ||
processorVersionInfo == 0x206D0 || processor == 0x206D0 ||
processorVersionInfo == 0x306A0 || processor == 0x306A0 ||
processorVersionInfo == 0x306E0) processor == 0x306E0
useAVXmemmove = support_avx && !isIntelBridgeFamily useAVXmemmove = support_avx && !isIntelBridgeFamily
} }
...@@ -27,8 +27,8 @@ tail: ...@@ -27,8 +27,8 @@ tail:
JBE _5through8 JBE _5through8
CMPL BX, $16 CMPL BX, $16
JBE _9through16 JBE _9through16
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 CMPB runtime·support_sse2(SB), $1
JEQ nosse2 JNE nosse2
PXOR X0, X0 PXOR X0, X0
CMPL BX, $32 CMPL BX, $32
JBE _17through32 JBE _17through32
......
...@@ -49,8 +49,8 @@ tail: ...@@ -49,8 +49,8 @@ tail:
JBE move_5through8 JBE move_5through8
CMPL BX, $16 CMPL BX, $16
JBE move_9through16 JBE move_9through16
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 CMPB runtime·support_sse2(SB), $1
JEQ nosse2 JNE nosse2
CMPL BX, $32 CMPL BX, $32
JBE move_17through32 JBE move_17through32
CMPL BX, $64 CMPL BX, $64
...@@ -71,8 +71,8 @@ nosse2: ...@@ -71,8 +71,8 @@ nosse2:
*/ */
forward: forward:
// If REP MOVSB isn't fast, don't use it // If REP MOVSB isn't fast, don't use it
TESTL $(1<<9), runtime·cpuid_ebx7(SB) // erms, aka enhanced REP MOVSB/STOSB CMPB runtime·support_erms(SB), $1 // enhanced REP MOVSB/STOSB
JEQ fwdBy4 JNE fwdBy4
// Check alignment // Check alignment
MOVL SI, AX MOVL SI, AX
......
...@@ -81,8 +81,8 @@ forward: ...@@ -81,8 +81,8 @@ forward:
JLS move_256through2048 JLS move_256through2048
// If REP MOVSB isn't fast, don't use it // If REP MOVSB isn't fast, don't use it
TESTL $(1<<9), runtime·cpuid_ebx7(SB) // erms, aka enhanced REP MOVSB/STOSB CMPB runtime·support_erms(SB), $1 // enhanced REP MOVSB/STOSB
JEQ fwdBy8 JNE fwdBy8
// Check alignment // Check alignment
MOVL SI, AX MOVL SI, AX
......
...@@ -728,17 +728,29 @@ var ( ...@@ -728,17 +728,29 @@ var (
// Information about what cpu features are available. // Information about what cpu features are available.
// Set on startup in asm_{386,amd64,amd64p32}.s. // Set on startup in asm_{386,amd64,amd64p32}.s.
cpuid_eax uint32 // Packages outside the runtime should not use these
cpuid_ecx uint32 // as they are not an external api.
cpuid_edx uint32 processorVersionInfo uint32
cpuid_ebx7 uint32 // not set on amd64p32 isIntel bool
isIntel bool lfenceBeforeRdtsc bool
lfenceBeforeRdtsc bool support_aes bool
support_avx bool support_avx bool
support_avx2 bool support_avx2 bool
support_bmi1 bool support_bmi1 bool
support_bmi2 bool support_bmi2 bool
support_popcnt bool support_erms bool
support_osxsave bool
support_popcnt bool
support_sse2 bool
support_sse41 bool
support_sse42 bool
support_ssse3 bool
// TODO(moehrmann) delete below variables once external
// packages have their dependencies on these removed.
cpuid_ecx uint32
cpuid_edx uint32
cpuid_ebx7 uint32 // not set on amd64p32
goarm uint8 // set by cmd/link on arm systems goarm uint8 // set by cmd/link on arm systems
framepointer_enabled bool // set by cmd/link framepointer_enabled bool // set by cmd/link
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment