Commit a5868a47 authored by Matthew Dempsky's avatar Matthew Dempsky

cmd/internal/obj/x86: move MOV->XOR rewriting into compiler

Fixes #20986.

Change-Id: Ic3cf5c0ab260f259ecff7b92cfdf5f4ae432aef3
Reviewed-on: https://go-review.googlesource.com/73072
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 73f1a1a1
...@@ -494,6 +494,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -494,6 +494,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
x := v.Reg() x := v.Reg()
// If flags aren't live (indicated by v.Aux == nil),
// then we can rewrite MOV $0, AX into XOR AX, AX.
if v.AuxInt == 0 && v.Aux == nil {
p := s.Prog(x86.AXORL)
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = x
break
}
asm := v.Op.Asm() asm := v.Op.Asm()
// Use MOVL to move a small constant into a register // Use MOVL to move a small constant into a register
// when the constant is positive and fits into 32 bits. // when the constant is positive and fits into 32 bits.
...@@ -506,11 +518,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -506,11 +518,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Offset = v.AuxInt p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = x p.To.Reg = x
// If flags are live at this instruction, suppress the
// MOV $0,AX -> XOR AX,AX optimization.
if v.Aux != nil {
p.Mark |= x86.PRESERVEFLAGS
}
case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
x := v.Reg() x := v.Reg()
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
......
...@@ -426,16 +426,23 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -426,16 +426,23 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
case ssa.Op386MOVLconst: case ssa.Op386MOVLconst:
x := v.Reg() x := v.Reg()
// If flags aren't live (indicated by v.Aux == nil),
// then we can rewrite MOV $0, AX into XOR AX, AX.
if v.AuxInt == 0 && v.Aux == nil {
p := s.Prog(x86.AXORL)
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = x
break
}
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = x p.To.Reg = x
// If flags are live at this instruction, suppress the
// MOV $0,AX -> XOR AX,AX optimization.
if v.Aux != nil {
p.Mark |= x86.PRESERVEFLAGS
}
case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst: case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
x := v.Reg() x := v.Reg()
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
......
...@@ -36,8 +36,7 @@ import "cmd/internal/obj" ...@@ -36,8 +36,7 @@ import "cmd/internal/obj"
const ( const (
/* mark flags */ /* mark flags */
DONE = 1 << iota DONE = 1 << iota
PRESERVEFLAGS // not allowed to clobber flags
) )
/* /*
......
...@@ -202,7 +202,6 @@ const ( ...@@ -202,7 +202,6 @@ const (
Zm_ilo Zm_ilo
Zib_rr Zib_rr
Zil_rr Zil_rr
Zclr
Zbyte Zbyte
Zvex_rm_v_r Zvex_rm_v_r
Zvex_r_v_rm Zvex_r_v_rm
...@@ -412,7 +411,6 @@ var ybtl = []ytab{ ...@@ -412,7 +411,6 @@ var ybtl = []ytab{
var ymovw = []ytab{ var ymovw = []ytab{
{Zr_m, 1, argList{Yrl, Yml}}, {Zr_m, 1, argList{Yrl, Yml}},
{Zm_r, 1, argList{Yml, Yrl}}, {Zm_r, 1, argList{Yml, Yrl}},
{Zclr, 1, argList{Yi0, Yrl}},
{Zil_rp, 1, argList{Yi32, Yrl}}, {Zil_rp, 1, argList{Yi32, Yrl}},
{Zilo_m, 2, argList{Yi32, Yml}}, {Zilo_m, 2, argList{Yi32, Yml}},
{Zaut_r, 2, argList{Yiauto, Yrl}}, {Zaut_r, 2, argList{Yiauto, Yrl}},
...@@ -421,7 +419,6 @@ var ymovw = []ytab{ ...@@ -421,7 +419,6 @@ var ymovw = []ytab{
var ymovl = []ytab{ var ymovl = []ytab{
{Zr_m, 1, argList{Yrl, Yml}}, {Zr_m, 1, argList{Yrl, Yml}},
{Zm_r, 1, argList{Yml, Yrl}}, {Zm_r, 1, argList{Yml, Yrl}},
{Zclr, 1, argList{Yi0, Yrl}},
{Zil_rp, 1, argList{Yi32, Yrl}}, {Zil_rp, 1, argList{Yi32, Yrl}},
{Zilo_m, 2, argList{Yi32, Yml}}, {Zilo_m, 2, argList{Yi32, Yml}},
{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
...@@ -447,7 +444,6 @@ var ymovq = []ytab{ ...@@ -447,7 +444,6 @@ var ymovq = []ytab{
// valid only in 64-bit mode, usually with 64-bit prefix // valid only in 64-bit mode, usually with 64-bit prefix
{Zr_m, 1, argList{Yrl, Yml}}, // 0x89 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89
{Zm_r, 1, argList{Yml, Yrl}}, // 0x8b {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b
{Zclr, 1, argList{Yi0, Yrl}}, // 0x31
{Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0)
{Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate
{Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0)
...@@ -1217,7 +1213,7 @@ var optab = ...@@ -1217,7 +1213,7 @@ var optab =
{AMOVHLPS, yxr, Pm, [23]uint8{0x12}}, {AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}}, {AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}}, {AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, {AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
{AMOVLHPS, yxr, Pm, [23]uint8{0x16}}, {AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}}, {AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}}, {AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
...@@ -1230,7 +1226,7 @@ var optab = ...@@ -1230,7 +1226,7 @@ var optab =
{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}}, {AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}}, {AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}}, {AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, {AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}}, {AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
{AMOVSB, ynone, Pb, [23]uint8{0xa4}}, {AMOVSB, ynone, Pb, [23]uint8{0xa4}},
{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}}, {AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
...@@ -1240,7 +1236,7 @@ var optab = ...@@ -1240,7 +1236,7 @@ var optab =
{AMOVSW, ynone, Pe, [23]uint8{0xa5}}, {AMOVSW, ynone, Pe, [23]uint8{0xa5}},
{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}}, {AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}}, {AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}}, {AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}}, {AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}}, {AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}}, {AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
...@@ -2405,10 +2401,6 @@ func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { ...@@ -2405,10 +2401,6 @@ func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
v = int64(int32(v)) v = int64(int32(v))
} }
if v == 0 { if v == 0 {
if p.Mark&PRESERVEFLAGS != 0 {
// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
return Yu7
}
return Yi0 return Yi0
} }
if v == 1 { if v == 1 {
...@@ -3857,11 +3849,6 @@ func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { ...@@ -3857,11 +3849,6 @@ func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
asmbuf.Put1(byte(op + reg[p.From.Reg])) asmbuf.Put1(byte(op + reg[p.From.Reg]))
case Zclr:
asmbuf.rexflag &^= Pw
asmbuf.Put1(byte(op))
asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
case Zcallcon, Zjmpcon: case Zcallcon, Zjmpcon:
if yt.zcase == Zcallcon { if yt.zcase == Zcallcon {
asmbuf.Put1(byte(op)) asmbuf.Put1(byte(op))
......
...@@ -30,8 +30,6 @@ TEXT ·divWW(SB),NOSPLIT,$0 ...@@ -30,8 +30,6 @@ TEXT ·divWW(SB),NOSPLIT,$0
// The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0. // The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0.
// It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared. // It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared.
// This is faster than using rotate instructions. // This is faster than using rotate instructions.
//
// CAUTION: Note that MOVQ $0, Rx is translated to XORQ Rx, Rx which clears the carry bit!
// func addVV(z, x, y []Word) (c Word) // func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),NOSPLIT,$0 TEXT ·addVV(SB),NOSPLIT,$0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment