Commit c069bc49 authored by Keith Randall's avatar Keith Randall

[dev.ssa] cmd/compile: implement GO386=387

Last part of the 386 SSA port.

Modify the x86 backend to simulate SSE registers and
instructions with 387 registers and instructions.
The simulation isn't terribly performant, but it works,
and the old implementation wasn't very performant either.
Leaving to people who care about 387 to optimize if they want.

Turn on SSA backend for 386 by default.

Fixes #16358

Change-Id: I678fb59132620b2c47e993c1c10c4c21135f70c0
Reviewed-on: https://go-review.googlesource.com/25271
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 77ef597f
...@@ -26,6 +26,9 @@ func initssa() *ssa.Config { ...@@ -26,6 +26,9 @@ func initssa() *ssa.Config {
ssaExp.mustImplement = true ssaExp.mustImplement = true
if ssaConfig == nil { if ssaConfig == nil {
ssaConfig = ssa.NewConfig(Thearch.LinkArch.Name, &ssaExp, Ctxt, Debug['N'] == 0) ssaConfig = ssa.NewConfig(Thearch.LinkArch.Name, &ssaExp, Ctxt, Debug['N'] == 0)
if Thearch.LinkArch.Name == "386" {
ssaConfig.Set387(Thearch.Use387)
}
} }
return ssaConfig return ssaConfig
} }
...@@ -37,7 +40,7 @@ func shouldssa(fn *Node) bool { ...@@ -37,7 +40,7 @@ func shouldssa(fn *Node) bool {
if os.Getenv("SSATEST") == "" { if os.Getenv("SSATEST") == "" {
return false return false
} }
case "amd64", "amd64p32", "arm": case "amd64", "amd64p32", "arm", "386":
// Generally available. // Generally available.
} }
if !ssaEnabled { if !ssaEnabled {
...@@ -3948,6 +3951,10 @@ type SSAGenState struct { ...@@ -3948,6 +3951,10 @@ type SSAGenState struct {
// bstart remembers where each block starts (indexed by block ID) // bstart remembers where each block starts (indexed by block ID)
bstart []*obj.Prog bstart []*obj.Prog
// 387 port: maps from SSE registers (REG_X?) to 387 registers (REG_F?)
SSEto387 map[int16]int16
Scratch387 *Node
} }
// Pc returns the current Prog. // Pc returns the current Prog.
...@@ -3984,6 +3991,11 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) { ...@@ -3984,6 +3991,11 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
blockProgs[Pc] = f.Blocks[0] blockProgs[Pc] = f.Blocks[0]
} }
if Thearch.Use387 {
s.SSEto387 = map[int16]int16{}
s.Scratch387 = temp(Types[TUINT64])
}
// Emit basic blocks // Emit basic blocks
for i, b := range f.Blocks { for i, b := range f.Blocks {
s.bstart[b.ID] = Pc s.bstart[b.ID] = Pc
......
...@@ -30,6 +30,7 @@ type Config struct { ...@@ -30,6 +30,7 @@ type Config struct {
optimize bool // Do optimization optimize bool // Do optimization
noDuffDevice bool // Don't use Duff's device noDuffDevice bool // Don't use Duff's device
nacl bool // GOOS=nacl nacl bool // GOOS=nacl
use387 bool // GO386=387
sparsePhiCutoff uint64 // Sparse phi location algorithm used above this #blocks*#variables score sparsePhiCutoff uint64 // Sparse phi location algorithm used above this #blocks*#variables score
curFunc *Func curFunc *Func
...@@ -243,6 +244,10 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config ...@@ -243,6 +244,10 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
return c return c
} }
func (c *Config) Set387(b bool) {
c.use387 = b
}
func (c *Config) Frontend() Frontend { return c.fe } func (c *Config) Frontend() Frontend { return c.fe }
func (c *Config) SparsePhiCutoff() uint64 { return c.sparsePhiCutoff } func (c *Config) SparsePhiCutoff() uint64 { return c.sparsePhiCutoff }
......
...@@ -49,6 +49,17 @@ var regNames386 = []string{ ...@@ -49,6 +49,17 @@ var regNames386 = []string{
"SB", "SB",
} }
// Notes on 387 support.
// - The 387 has a weird stack-register setup for floating-point registers.
// We use these registers when SSE registers are not available (when GO386=387).
// - We use the same register names (X0-X7) but they refer to the 387
// floating-point registers. That way, most of the SSA backend is unchanged.
// - The instruction generation pass maintains an SSE->387 register mapping.
// This mapping is updated whenever the FP stack is pushed or popped so that
// we can always find a given SSE register even when the TOS pointer has changed.
// - To facilitate the mapping from SSE to 387, we enforce that
// every basic block starts and ends with an empty floating-point stack.
func init() { func init() {
// Make map from reg names to reg integers. // Make map from reg names to reg integers.
if len(regNames386) > 64 { if len(regNames386) > 64 {
......
...@@ -507,6 +507,9 @@ func (s *regAllocState) init(f *Func) { ...@@ -507,6 +507,9 @@ func (s *regAllocState) init(f *Func) {
s.allocatable &^= 1 << 15 // R15 - reserved for nacl s.allocatable &^= 1 << 15 // R15 - reserved for nacl
} }
} }
if s.f.Config.use387 {
s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go)
}
s.regs = make([]regState, s.numRegs) s.regs = make([]regState, s.numRegs)
s.values = make([]valState, f.NumValues()) s.values = make([]valState, f.NumValues())
...@@ -834,6 +837,9 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -834,6 +837,9 @@ func (s *regAllocState) regalloc(f *Func) {
if phiRegs[i] != noRegister { if phiRegs[i] != noRegister {
continue continue
} }
if s.f.Config.use387 && v.Type.IsFloat() {
continue // 387 can't handle floats in registers between blocks
}
m := s.compatRegs(v.Type) &^ phiUsed &^ s.used m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
if m != 0 { if m != 0 {
r := pickReg(m) r := pickReg(m)
...@@ -1300,6 +1306,11 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -1300,6 +1306,11 @@ func (s *regAllocState) regalloc(f *Func) {
s.freeUseRecords = u s.freeUseRecords = u
} }
// Spill any values that can't live across basic block boundaries.
if s.f.Config.use387 {
s.freeRegs(s.f.Config.fpRegMask)
}
// If we are approaching a merge point and we are the primary // If we are approaching a merge point and we are the primary
// predecessor of it, find live values that we use soon after // predecessor of it, find live values that we use soon after
// the merge point and promote them to registers now. // the merge point and promote them to registers now.
...@@ -1323,6 +1334,9 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -1323,6 +1334,9 @@ func (s *regAllocState) regalloc(f *Func) {
continue continue
} }
v := s.orig[vid] v := s.orig[vid]
if s.f.Config.use387 && v.Type.IsFloat() {
continue // 387 can't handle floats in registers between blocks
}
m := s.compatRegs(v.Type) &^ s.used m := s.compatRegs(v.Type) &^ s.used
if m&^desired.avoid != 0 { if m&^desired.avoid != 0 {
m &^= desired.avoid m &^= desired.avoid
......
This diff is collapsed.
...@@ -139,6 +139,13 @@ func opregreg(op obj.As, dest, src int16) *obj.Prog { ...@@ -139,6 +139,13 @@ func opregreg(op obj.As, dest, src int16) *obj.Prog {
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
s.SetLineno(v.Line) s.SetLineno(v.Line)
if gc.Thearch.Use387 {
if ssaGenValue387(s, v) {
return // v was handled by 387 generation.
}
}
switch v.Op { switch v.Op {
case ssa.Op386ADDL: case ssa.Op386ADDL:
r := gc.SSARegNum(v) r := gc.SSARegNum(v)
...@@ -899,6 +906,11 @@ var nefJumps = [2][2]gc.FloatingEQNEJump{ ...@@ -899,6 +906,11 @@ var nefJumps = [2][2]gc.FloatingEQNEJump{
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
s.SetLineno(b.Line) s.SetLineno(b.Line)
if gc.Thearch.Use387 {
// Empty the 387's FP stack before the block ends.
flush387(s)
}
switch b.Kind { switch b.Kind {
case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck: case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck:
if b.Succs[0].Block() != next { if b.Succs[0].Block() != next {
......
...@@ -193,9 +193,7 @@ TEXT runtime·asminit(SB),NOSPLIT,$0-0 ...@@ -193,9 +193,7 @@ TEXT runtime·asminit(SB),NOSPLIT,$0-0
// Other operating systems use double precision. // Other operating systems use double precision.
// Change to double precision to match them, // Change to double precision to match them,
// and to match other hardware that only has double. // and to match other hardware that only has double.
PUSHL $0x27F FLDCW runtime·controlWord64(SB)
FLDCW 0(SP)
POPL AX
RET RET
/* /*
...@@ -1638,47 +1636,20 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 ...@@ -1638,47 +1636,20 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
MOVL AX, runtime·lastmoduledatap(SB) MOVL AX, runtime·lastmoduledatap(SB)
RET RET
TEXT runtime·uint32tofloat64(SB),NOSPLIT,$0-12 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
// TODO: condition on GO386 env var.
MOVL a+0(FP), AX MOVL a+0(FP), AX
MOVL AX, 0(SP)
// Check size. MOVL $0, 4(SP)
CMPL AX, $0x80000000 FMOVV 0(SP), F0
JAE large FMOVDP F0, ret+4(FP)
// Less than 2**31, convert directly.
CVTSL2SD AX, X0
MOVSD X0, ret+4(FP)
RET
large:
// >= 2**31. Subtract 2**31 (uint32), convert, then add 2**31 (float64).
SUBL $0x80000000, AX
CVTSL2SD AX, X0
ADDSD twotothe31<>(SB), X0
MOVSD X0, ret+4(FP)
RET RET
TEXT runtime·float64touint32(SB),NOSPLIT,$0-12 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
// TODO: condition on GO386 env var. FMOVD a+0(FP), F0
MOVSD a+0(FP), X0 FSTCW 0(SP)
FLDCW runtime·controlWord64trunc(SB)
// Check size. FMOVVP F0, 4(SP)
MOVSD twotothe31<>(SB), X1 FLDCW 0(SP)
UCOMISD X1, X0 //note: args swapped relative to CMPL MOVL 4(SP), AX
JAE large
// Less than 2**31, convert directly.
CVTTSD2SL X0, AX
MOVL AX, ret+8(FP)
RET
large:
// >= 2**31. Subtract 2**31 (float64), convert, then add 2**31 (uint32).
SUBSD X1, X0
CVTTSD2SL X0, AX
ADDL $0x80000000, AX
MOVL AX, ret+8(FP) MOVL AX, ret+8(FP)
RET RET
// 2**31 as a float64.
DATA twotothe31<>+0x00(SB)/8, $0x41e0000000000000
GLOBL twotothe31<>(SB),RODATA,$8
...@@ -255,3 +255,17 @@ func slowdodiv(n, d uint64) (q, r uint64) { ...@@ -255,3 +255,17 @@ func slowdodiv(n, d uint64) (q, r uint64) {
} }
return q, n return q, n
} }
// Floating point control word values for GOARCH=386 GO386=387.
// Bits 0-5 are bits to disable floating-point exceptions.
// Bits 8-9 are the precision control:
// 0 = single precision a.k.a. float32
// 2 = double precision a.k.a. float64
// Bits 10-11 are the rounding mode:
// 0 = round to nearest (even on a tie)
// 3 = round toward zero
var (
controlWord64 uint16 = 0x3f + 2<<8 + 0<<10
controlWord32 = 0x3f + 0<<8 + 0<<10
controlWord64trunc = 0x3f + 2<<8 + 3<<10
)
// +build !amd64,!arm,!amd64p32 // +build !amd64,!arm,!amd64p32,!386
// errorcheck -0 -l -live -wb=0 // errorcheck -0 -l -live -wb=0
// Copyright 2014 The Go Authors. All rights reserved. // Copyright 2014 The Go Authors. All rights reserved.
......
// +build amd64 arm amd64p32 // +build amd64 arm amd64p32 386
// errorcheck -0 -l -live -wb=0 // errorcheck -0 -l -live -wb=0
// Copyright 2014 The Go Authors. All rights reserved. // Copyright 2014 The Go Authors. All rights reserved.
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Fails on ppc64x because of incomplete optimization. // Fails on ppc64x because of incomplete optimization.
// See issues 9058. // See issues 9058.
// Same reason for mips64x and s390x. // Same reason for mips64x and s390x.
// +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm,!amd64p32 // +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm,!amd64p32,!386
// Copyright 2013 The Go Authors. All rights reserved. // Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
......
// errorcheck -0 -d=nil // errorcheck -0 -d=nil
// +build amd64 arm amd64p32 // +build amd64 arm amd64p32 386
// Copyright 2013 The Go Authors. All rights reserved. // Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
......
// +build !amd64,!arm,!amd64p32 // +build !amd64,!arm,!amd64p32,!386
// errorcheck -0 -d=append,slice // errorcheck -0 -d=append,slice
// Copyright 2015 The Go Authors. All rights reserved. // Copyright 2015 The Go Authors. All rights reserved.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment