Commit b64e8178 authored by Martin Möhrmann's avatar Martin Möhrmann

runtime: simplify detection of preference to use AVX memmove

Reduces cmd/go by 4464 bytes on amd64.

Removes the duplicate detection of AVX support and
presence of Intel processors.

Change-Id: I4670189951a63760fae217708f68d65e94a30dc5
Reviewed-on: https://go-review.googlesource.com/41570Reviewed-by: default avatarKeith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 16271b8b
......@@ -67,14 +67,16 @@ has_cpuid:
JNE notintel
CMPL CX, $0x6C65746E // "ntel"
JNE notintel
MOVB $1, runtime·isIntel(SB)
MOVB $1, runtime·lfenceBeforeRdtsc(SB)
notintel:
// Load EAX=1 cpuid flags
MOVL $1, AX
CPUID
MOVL CX, AX // Move to global variable clobbers CX when generating PIC
MOVL AX, runtime·cpuid_ecx(SB)
MOVL CX, DI // Move to global variable clobbers CX when generating PIC
MOVL AX, runtime·cpuid_eax(SB)
MOVL DI, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB)
// Check for MMX support
......
......@@ -41,12 +41,14 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
JNE notintel
CMPL CX, $0x6C65746E // "ntel"
JNE notintel
MOVB $1, runtime·isIntel(SB)
MOVB $1, runtime·lfenceBeforeRdtsc(SB)
notintel:
// Load EAX=1 cpuid flags
MOVQ $1, AX
CPUID
MOVL AX, runtime·cpuid_eax(SB)
MOVL CX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB)
......
......@@ -32,8 +32,19 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
CPUID
CMPQ AX, $0
JE nocpuinfo
CMPL BX, $0x756E6547 // "Genu"
JNE notintel
CMPL DX, $0x49656E69 // "ineI"
JNE notintel
CMPL CX, $0x6C65746E // "ntel"
JNE notintel
MOVB $1, runtime·isIntel(SB)
notintel:
MOVQ $1, AX
CPUID
MOVL AX, runtime·cpuid_eax(SB)
MOVL CX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB)
nocpuinfo:
......
......@@ -4,72 +4,17 @@
package runtime
var vendorStringBytes [12]byte
var maxInputValue uint32
var featureFlags uint32
var processorVersionInfo uint32
var useRepMovs = true
func hasFeature(feature uint32) bool {
return (featureFlags & feature) != 0
}
func cpuid_low(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32) // implemented in cpuidlow_amd64.s
func xgetbv_low(arg1 uint32) (eax, edx uint32) // implemented in cpuidlow_amd64.s
var useAVXmemmove bool
func init() {
const cfOSXSAVE uint32 = 1 << 27
const cfAVX uint32 = 1 << 28
leaf0()
leaf1()
enabledAVX := false
// Let's check if OS has set CR4.OSXSAVE[bit 18]
// to enable XGETBV instruction.
if hasFeature(cfOSXSAVE) {
eax, _ := xgetbv_low(0)
// Let's check that XCR0[2:1] = ‘11b’
// i.e. XMM state and YMM state are enabled by OS.
enabledAVX = (eax & 0x6) == 0x6
}
// Let's remove stepping and reserved fields
processorVersionInfo := cpuid_eax & 0x0FFF3FF0
isIntelBridgeFamily := (processorVersionInfo == 0x206A0 ||
isIntelBridgeFamily := isIntel &&
(processorVersionInfo == 0x206A0 ||
processorVersionInfo == 0x206D0 ||
processorVersionInfo == 0x306A0 ||
processorVersionInfo == 0x306E0) &&
isIntel()
useRepMovs = !(hasFeature(cfAVX) && enabledAVX) || isIntelBridgeFamily
}
func leaf0() {
eax, ebx, ecx, edx := cpuid_low(0, 0)
maxInputValue = eax
int32ToBytes(ebx, vendorStringBytes[0:4])
int32ToBytes(edx, vendorStringBytes[4:8])
int32ToBytes(ecx, vendorStringBytes[8:12])
}
func leaf1() {
if maxInputValue < 1 {
return
}
eax, _, ecx, _ := cpuid_low(1, 0)
// Let's remove stepping and reserved fields
processorVersionInfo = eax & 0x0FFF3FF0
featureFlags = ecx
}
func int32ToBytes(arg uint32, buffer []byte) {
buffer[3] = byte(arg >> 24)
buffer[2] = byte(arg >> 16)
buffer[1] = byte(arg >> 8)
buffer[0] = byte(arg)
}
processorVersionInfo == 0x306E0)
func isIntel() bool {
intelSignature := [12]byte{'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l'}
return vendorStringBytes == intelSignature
useAVXmemmove = support_avx && !isIntelBridgeFamily
}
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// func cpuid_low(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid_low(SB), 4, $0-24
MOVL arg1+0(FP), AX
MOVL arg2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv_low(arg1 uint32) (eax, edx uint32)
TEXT ·xgetbv_low(SB), 4, $0-16
MOVL arg1+0(FP), CX
// XGETBV
BYTE $0x0F; BYTE $0x01; BYTE $0xD0
MOVL AX,eax+8(FP)
MOVL DX,edx+12(FP)
RET
......@@ -64,8 +64,8 @@ tail:
JBE move_129through256
// TODO: use branch table and BSR to make this just a single dispatch
TESTB $1, runtime·useRepMovs(SB)
JZ avxUnaligned
TESTB $1, runtime·useAVXmemmove(SB)
JNZ avxUnaligned
/*
* check and set for backwards
......
......@@ -727,10 +727,12 @@ var (
newprocs int32
// Information about what cpu features are available.
// Set on startup in asm_{x86,amd64}.s.
// Set on startup in asm_{386,amd64,amd64p32}.s.
cpuid_eax uint32
cpuid_ecx uint32
cpuid_edx uint32
cpuid_ebx7 uint32
cpuid_ebx7 uint32 // not set on amd64p32
isIntel bool
lfenceBeforeRdtsc bool
support_avx bool
support_avx2 bool
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment