Commit b64e8178 authored by Martin Möhrmann's avatar Martin Möhrmann

runtime: simplify detection of preference to use AVX memmove

Reduces cmd/go by 4464 bytes on amd64.

Removes the duplicate detection of AVX support and
presence of Intel processors.

Change-Id: I4670189951a63760fae217708f68d65e94a30dc5
Reviewed-on: https://go-review.googlesource.com/41570Reviewed-by: default avatarKeith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 16271b8b
...@@ -67,14 +67,16 @@ has_cpuid: ...@@ -67,14 +67,16 @@ has_cpuid:
JNE notintel JNE notintel
CMPL CX, $0x6C65746E // "ntel" CMPL CX, $0x6C65746E // "ntel"
JNE notintel JNE notintel
MOVB $1, runtime·isIntel(SB)
MOVB $1, runtime·lfenceBeforeRdtsc(SB) MOVB $1, runtime·lfenceBeforeRdtsc(SB)
notintel: notintel:
// Load EAX=1 cpuid flags // Load EAX=1 cpuid flags
MOVL $1, AX MOVL $1, AX
CPUID CPUID
MOVL CX, AX // Move to global variable clobbers CX when generating PIC MOVL CX, DI // Move to global variable clobbers CX when generating PIC
MOVL AX, runtime·cpuid_ecx(SB) MOVL AX, runtime·cpuid_eax(SB)
MOVL DI, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB) MOVL DX, runtime·cpuid_edx(SB)
// Check for MMX support // Check for MMX support
......
...@@ -41,12 +41,14 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0 ...@@ -41,12 +41,14 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
JNE notintel JNE notintel
CMPL CX, $0x6C65746E // "ntel" CMPL CX, $0x6C65746E // "ntel"
JNE notintel JNE notintel
MOVB $1, runtime·isIntel(SB)
MOVB $1, runtime·lfenceBeforeRdtsc(SB) MOVB $1, runtime·lfenceBeforeRdtsc(SB)
notintel: notintel:
// Load EAX=1 cpuid flags // Load EAX=1 cpuid flags
MOVQ $1, AX MOVQ $1, AX
CPUID CPUID
MOVL AX, runtime·cpuid_eax(SB)
MOVL CX, runtime·cpuid_ecx(SB) MOVL CX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB) MOVL DX, runtime·cpuid_edx(SB)
......
...@@ -32,8 +32,19 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0 ...@@ -32,8 +32,19 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
CPUID CPUID
CMPQ AX, $0 CMPQ AX, $0
JE nocpuinfo JE nocpuinfo
CMPL BX, $0x756E6547 // "Genu"
JNE notintel
CMPL DX, $0x49656E69 // "ineI"
JNE notintel
CMPL CX, $0x6C65746E // "ntel"
JNE notintel
MOVB $1, runtime·isIntel(SB)
notintel:
MOVQ $1, AX MOVQ $1, AX
CPUID CPUID
MOVL AX, runtime·cpuid_eax(SB)
MOVL CX, runtime·cpuid_ecx(SB) MOVL CX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB) MOVL DX, runtime·cpuid_edx(SB)
nocpuinfo: nocpuinfo:
......
...@@ -4,72 +4,17 @@ ...@@ -4,72 +4,17 @@
package runtime package runtime
var vendorStringBytes [12]byte var useAVXmemmove bool
var maxInputValue uint32
var featureFlags uint32
var processorVersionInfo uint32
var useRepMovs = true
func hasFeature(feature uint32) bool {
return (featureFlags & feature) != 0
}
func cpuid_low(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32) // implemented in cpuidlow_amd64.s
func xgetbv_low(arg1 uint32) (eax, edx uint32) // implemented in cpuidlow_amd64.s
func init() { func init() {
const cfOSXSAVE uint32 = 1 << 27
const cfAVX uint32 = 1 << 28
leaf0()
leaf1()
enabledAVX := false
// Let's check if OS has set CR4.OSXSAVE[bit 18]
// to enable XGETBV instruction.
if hasFeature(cfOSXSAVE) {
eax, _ := xgetbv_low(0)
// Let's check that XCR0[2:1] = ‘11b’
// i.e. XMM state and YMM state are enabled by OS.
enabledAVX = (eax & 0x6) == 0x6
}
isIntelBridgeFamily := (processorVersionInfo == 0x206A0 ||
processorVersionInfo == 0x206D0 ||
processorVersionInfo == 0x306A0 ||
processorVersionInfo == 0x306E0) &&
isIntel()
useRepMovs = !(hasFeature(cfAVX) && enabledAVX) || isIntelBridgeFamily
}
func leaf0() {
eax, ebx, ecx, edx := cpuid_low(0, 0)
maxInputValue = eax
int32ToBytes(ebx, vendorStringBytes[0:4])
int32ToBytes(edx, vendorStringBytes[4:8])
int32ToBytes(ecx, vendorStringBytes[8:12])
}
func leaf1() {
if maxInputValue < 1 {
return
}
eax, _, ecx, _ := cpuid_low(1, 0)
// Let's remove stepping and reserved fields // Let's remove stepping and reserved fields
processorVersionInfo = eax & 0x0FFF3FF0 processorVersionInfo := cpuid_eax & 0x0FFF3FF0
featureFlags = ecx
}
func int32ToBytes(arg uint32, buffer []byte) { isIntelBridgeFamily := isIntel &&
buffer[3] = byte(arg >> 24) (processorVersionInfo == 0x206A0 ||
buffer[2] = byte(arg >> 16) processorVersionInfo == 0x206D0 ||
buffer[1] = byte(arg >> 8) processorVersionInfo == 0x306A0 ||
buffer[0] = byte(arg) processorVersionInfo == 0x306E0)
}
func isIntel() bool { useAVXmemmove = support_avx && !isIntelBridgeFamily
intelSignature := [12]byte{'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l'}
return vendorStringBytes == intelSignature
} }
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// func cpuid_low(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid_low(SB), 4, $0-24
MOVL arg1+0(FP), AX
MOVL arg2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv_low(arg1 uint32) (eax, edx uint32)
TEXT ·xgetbv_low(SB), 4, $0-16
MOVL arg1+0(FP), CX
// XGETBV
BYTE $0x0F; BYTE $0x01; BYTE $0xD0
MOVL AX,eax+8(FP)
MOVL DX,edx+12(FP)
RET
...@@ -64,8 +64,8 @@ tail: ...@@ -64,8 +64,8 @@ tail:
JBE move_129through256 JBE move_129through256
// TODO: use branch table and BSR to make this just a single dispatch // TODO: use branch table and BSR to make this just a single dispatch
TESTB $1, runtime·useRepMovs(SB) TESTB $1, runtime·useAVXmemmove(SB)
JZ avxUnaligned JNZ avxUnaligned
/* /*
* check and set for backwards * check and set for backwards
......
...@@ -727,10 +727,12 @@ var ( ...@@ -727,10 +727,12 @@ var (
newprocs int32 newprocs int32
// Information about what cpu features are available. // Information about what cpu features are available.
// Set on startup in asm_{x86,amd64}.s. // Set on startup in asm_{386,amd64,amd64p32}.s.
cpuid_eax uint32
cpuid_ecx uint32 cpuid_ecx uint32
cpuid_edx uint32 cpuid_edx uint32
cpuid_ebx7 uint32 cpuid_ebx7 uint32 // not set on amd64p32
isIntel bool
lfenceBeforeRdtsc bool lfenceBeforeRdtsc bool
support_avx bool support_avx bool
support_avx2 bool support_avx2 bool
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment