Commit a0da2d24 authored by Keith Randall's avatar Keith Randall

[dev.ssa] cmd/compile: Use ADD instead of LEA when we can

If the output register is one of the input registers,
we can use a real add instead of LEA.

Change-Id: Ide58f1536afb077c0b939d3a8c7555807fd1c5e3
Reviewed-on: https://go-review.googlesource.com/19234Reviewed-by: default avatarAlexandru Moșoi <alexandru@mosoi.ro>
parent 05434475
...@@ -3689,31 +3689,41 @@ func opregreg(op int, dest, src int16) *obj.Prog { ...@@ -3689,31 +3689,41 @@ func opregreg(op int, dest, src int16) *obj.Prog {
func (s *genState) genValue(v *ssa.Value) { func (s *genState) genValue(v *ssa.Value) {
lineno = v.Line lineno = v.Line
switch v.Op { switch v.Op {
case ssa.OpAMD64ADDQ: case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL, ssa.OpAMD64ADDW:
// TODO: use addq instead of leaq if target is in the right register. r := regnum(v)
p := Prog(x86.ALEAQ) r1 := regnum(v.Args[0])
p.From.Type = obj.TYPE_MEM r2 := regnum(v.Args[1])
p.From.Reg = regnum(v.Args[0]) switch {
p.From.Scale = 1 case r == r1:
p.From.Index = regnum(v.Args[1]) p := Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.To.Reg = regnum(v) p.From.Reg = r2
case ssa.OpAMD64ADDL: p.To.Type = obj.TYPE_REG
p := Prog(x86.ALEAL) p.To.Reg = r
p.From.Type = obj.TYPE_MEM case r == r2:
p.From.Reg = regnum(v.Args[0]) p := Prog(v.Op.Asm())
p.From.Scale = 1 p.From.Type = obj.TYPE_REG
p.From.Index = regnum(v.Args[1]) p.From.Reg = r1
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v) p.To.Reg = r
case ssa.OpAMD64ADDW: default:
p := Prog(x86.ALEAW) var asm int
p.From.Type = obj.TYPE_MEM switch v.Op {
p.From.Reg = regnum(v.Args[0]) case ssa.OpAMD64ADDQ:
p.From.Scale = 1 asm = x86.ALEAQ
p.From.Index = regnum(v.Args[1]) case ssa.OpAMD64ADDL:
p.To.Type = obj.TYPE_REG asm = x86.ALEAL
p.To.Reg = regnum(v) case ssa.OpAMD64ADDW:
asm = x86.ALEAW
}
p := Prog(asm)
p.From.Type = obj.TYPE_MEM
p.From.Reg = r1
p.From.Scale = 1
p.From.Index = r2
p.To.Type = obj.TYPE_REG
p.To.Reg = r
}
// 2-address opcode arithmetic, symmetric // 2-address opcode arithmetic, symmetric
case ssa.OpAMD64ADDB, ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, case ssa.OpAMD64ADDB, ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD,
ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ANDW, ssa.OpAMD64ANDB, ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ANDW, ssa.OpAMD64ANDB,
...@@ -3903,7 +3913,16 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3903,7 +3913,16 @@ func (s *genState) genValue(v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = r p.To.Reg = r
case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst, ssa.OpAMD64ADDWconst: case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst, ssa.OpAMD64ADDWconst:
// TODO: use addq instead of leaq if target is in the right register. r := regnum(v)
a := regnum(v.Args[0])
if r == a {
p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
p.To.Reg = r
return
}
var asm int var asm int
switch v.Op { switch v.Op {
case ssa.OpAMD64ADDQconst: case ssa.OpAMD64ADDQconst:
...@@ -3915,10 +3934,10 @@ func (s *genState) genValue(v *ssa.Value) { ...@@ -3915,10 +3934,10 @@ func (s *genState) genValue(v *ssa.Value) {
} }
p := Prog(asm) p := Prog(asm)
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
p.From.Reg = regnum(v.Args[0]) p.From.Reg = a
p.From.Offset = v.AuxInt p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v) p.To.Reg = r
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst: case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst:
r := regnum(v) r := regnum(v)
x := regnum(v.Args[0]) x := regnum(v.Args[0])
......
...@@ -21,7 +21,6 @@ Optimizations (better compiled code) ...@@ -21,7 +21,6 @@ Optimizations (better compiled code)
- Add a value range propagation pass (for bounds elim & bitwidth reduction) - Add a value range propagation pass (for bounds elim & bitwidth reduction)
- Make dead store pass inter-block - Make dead store pass inter-block
- (x86) More combining address arithmetic into loads/stores - (x86) More combining address arithmetic into loads/stores
- (x86) use ADDQ instead of LEAQ when we can
- redundant CMP in sequences like this: - redundant CMP in sequences like this:
SUBQ $8, AX SUBQ $8, AX
CMP AX, $0 CMP AX, $0
...@@ -37,7 +36,6 @@ Optimizations (better compiled code) ...@@ -37,7 +36,6 @@ Optimizations (better compiled code)
Same for interfaces? Same for interfaces?
- boolean logic: movb/xorb$1/testb/jeq -> movb/testb/jne - boolean logic: movb/xorb$1/testb/jeq -> movb/testb/jne
- (ADDQconst (SUBQconst x)) and vice-versa - (ADDQconst (SUBQconst x)) and vice-versa
- combine LEAQs
- store followed by load to same address - store followed by load to same address
- (CMPconst [0] (AND x y)) -> (TEST x y) - (CMPconst [0] (AND x y)) -> (TEST x y)
- more (LOAD (ADDQ )) -> LOADIDX - more (LOAD (ADDQ )) -> LOADIDX
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment