Commit 90065eab authored by Keith Randall's avatar Keith Randall

[dev.ssa] cmd/compile: use wider move instruction for floats

Distinguish move/load/store ops.  Unify some of this code a bit.

Reduces Mandelbrot slowdown with SSA from 58% to 12%.

Change-Id: I3276eaebcbcdd9de3f8299c79b5f25c0429194c4
Reviewed-on: https://go-review.googlesource.com/18677
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent da8af477
...@@ -3503,7 +3503,7 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3503,7 +3503,7 @@ func (s *genState) genValue(v *ssa.Value) {
x := regnum(v.Args[0]) x := regnum(v.Args[0])
y := regnum(v.Args[1]) y := regnum(v.Args[1])
if x != r && y != r { if x != r && y != r {
opregreg(regMoveByTypeAMD64(v.Type), r, x) opregreg(moveByType(v.Type), r, x)
x = r x = r
} }
p := Prog(v.Op.Asm()) p := Prog(v.Op.Asm())
...@@ -3527,7 +3527,7 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3527,7 +3527,7 @@ func (s *genState) genValue(v *ssa.Value) {
neg = true neg = true
} }
if x != r { if x != r {
opregreg(regMoveByTypeAMD64(v.Type), r, x) opregreg(moveByType(v.Type), r, x)
} }
opregreg(v.Op.Asm(), r, y) opregreg(v.Op.Asm(), r, y)
...@@ -3547,11 +3547,11 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3547,11 +3547,11 @@ func (s *genState) genValue(v *ssa.Value) {
// register move y to x15 // register move y to x15
// register move x to y // register move x to y
// rename y with x15 // rename y with x15
opregreg(regMoveByTypeAMD64(v.Type), x15, y) opregreg(moveByType(v.Type), x15, y)
opregreg(regMoveByTypeAMD64(v.Type), r, x) opregreg(moveByType(v.Type), r, x)
y = x15 y = x15
} else if x != r { } else if x != r {
opregreg(regMoveByTypeAMD64(v.Type), r, x) opregreg(moveByType(v.Type), r, x)
} }
opregreg(v.Op.Asm(), r, y) opregreg(v.Op.Asm(), r, y)
...@@ -3669,7 +3669,7 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3669,7 +3669,7 @@ func (s *genState) genValue(v *ssa.Value) {
if r == x86.REG_CX { if r == x86.REG_CX {
v.Fatalf("can't implement %s, target and shift both in CX", v.LongString()) v.Fatalf("can't implement %s, target and shift both in CX", v.LongString())
} }
p := Prog(regMoveAMD64(v.Type.Size())) p := Prog(moveByType(v.Type))
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = x p.From.Reg = x
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
...@@ -3701,7 +3701,7 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3701,7 +3701,7 @@ func (s *genState) genValue(v *ssa.Value) {
r := regnum(v) r := regnum(v)
x := regnum(v.Args[0]) x := regnum(v.Args[0])
if r != x { if r != x {
p := Prog(regMoveAMD64(v.Type.Size())) p := Prog(moveByType(v.Type))
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = x p.From.Reg = x
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
...@@ -3731,7 +3731,7 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3731,7 +3731,7 @@ func (s *genState) genValue(v *ssa.Value) {
x := regnum(v.Args[0]) x := regnum(v.Args[0])
r := regnum(v) r := regnum(v)
if x != r { if x != r {
p := Prog(regMoveAMD64(v.Type.Size())) p := Prog(moveByType(v.Type))
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = x p.From.Reg = x
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
...@@ -3910,14 +3910,14 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3910,14 +3910,14 @@ func (s *genState) genValue(v *ssa.Value) {
x := regnum(v.Args[0]) x := regnum(v.Args[0])
y := regnum(v) y := regnum(v)
if x != y { if x != y {
opregreg(regMoveByTypeAMD64(v.Type), y, x) opregreg(moveByType(v.Type), y, x)
} }
case ssa.OpLoadReg: case ssa.OpLoadReg:
if v.Type.IsFlags() { if v.Type.IsFlags() {
v.Unimplementedf("load flags not implemented: %v", v.LongString()) v.Unimplementedf("load flags not implemented: %v", v.LongString())
return return
} }
p := Prog(movSizeByType(v.Type)) p := Prog(loadByType(v.Type))
n, off := autoVar(v.Args[0]) n, off := autoVar(v.Args[0])
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
p.From.Node = n p.From.Node = n
...@@ -3937,7 +3937,7 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3937,7 +3937,7 @@ func (s *genState) genValue(v *ssa.Value) {
v.Unimplementedf("store flags not implemented: %v", v.LongString()) v.Unimplementedf("store flags not implemented: %v", v.LongString())
return return
} }
p := Prog(movSizeByType(v.Type)) p := Prog(storeByType(v.Type))
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[0]) p.From.Reg = regnum(v.Args[0])
n, off := autoVar(v) n, off := autoVar(v)
...@@ -4060,7 +4060,7 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -4060,7 +4060,7 @@ func (s *genState) genValue(v *ssa.Value) {
x := regnum(v.Args[0]) x := regnum(v.Args[0])
r := regnum(v) r := regnum(v)
if x != r { if x != r {
p := Prog(regMoveAMD64(v.Type.Size())) p := Prog(moveByType(v.Type))
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = x p.From.Reg = x
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
...@@ -4170,14 +4170,6 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -4170,14 +4170,6 @@ func (s *genState) genValue(v *ssa.Value) {
} }
} }
// movSizeByType returns the MOV instruction of the given type.
func movSizeByType(t ssa.Type) (asm int) {
// For x86, there's no difference between reg move opcodes
// and memory move opcodes.
asm = regMoveByTypeAMD64(t)
return
}
// movZero generates a register indirect move with a 0 immediate and keeps track of bytes left and next offset // movZero generates a register indirect move with a 0 immediate and keeps track of bytes left and next offset
func movZero(as int, width int64, nbytes int64, offset int64, regnum int16) (nleft int64, noff int64) { func movZero(as int, width int64, nbytes int64, offset int64, regnum int16) (nleft int64, noff int64) {
p := Prog(as) p := Prog(as)
...@@ -4477,9 +4469,23 @@ var ssaRegToReg = [...]int16{ ...@@ -4477,9 +4469,23 @@ var ssaRegToReg = [...]int16{
// TODO: arch-dependent // TODO: arch-dependent
} }
// regMoveAMD64 returns the register->register move opcode for the given width. // loadByType returns the load instruction of the given type.
// TODO: generalize for all architectures? func loadByType(t ssa.Type) int {
func regMoveAMD64(width int64) int { // For x86, there's no difference between load and store opcodes.
return storeByType(t)
}
// storeByType returns the store instruction of the given type.
func storeByType(t ssa.Type) int {
width := t.Size()
if t.IsFloat() {
switch width {
case 4:
return x86.AMOVSS
case 8:
return x86.AMOVSD
}
} else {
switch width { switch width {
case 1: case 1:
return x86.AMOVB return x86.AMOVB
...@@ -4489,24 +4495,19 @@ func regMoveAMD64(width int64) int { ...@@ -4489,24 +4495,19 @@ func regMoveAMD64(width int64) int {
return x86.AMOVL return x86.AMOVL
case 8: case 8:
return x86.AMOVQ return x86.AMOVQ
default:
panic("bad int register width")
} }
}
panic("bad store type")
} }
func regMoveByTypeAMD64(t ssa.Type) int { // moveByType returns the reg->reg move instruction of the given type.
width := t.Size() func moveByType(t ssa.Type) int {
if t.IsFloat() { if t.IsFloat() {
switch width { // Moving the whole sse2 register is faster
case 4: // than moving just the correct low portion of it.
return x86.AMOVSS return x86.AMOVAPD
case 8:
return x86.AMOVSD
default:
panic("bad float register width")
}
} else { } else {
switch width { switch t.Size() {
case 1: case 1:
return x86.AMOVB return x86.AMOVB
case 2: case 2:
...@@ -4519,7 +4520,6 @@ func regMoveByTypeAMD64(t ssa.Type) int { ...@@ -4519,7 +4520,6 @@ func regMoveByTypeAMD64(t ssa.Type) int {
panic("bad int register width") panic("bad int register width")
} }
} }
panic("bad register type") panic("bad register type")
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment