Commit 34fe8295 authored by Bryan C. Mills's avatar Bryan C. Mills

Revert "compile: prefer an AND instead of SHR+SHL instructions"

This reverts CL 194297.

Reason for revert: introduced register allocation failures on PPC64LE builders.

Updates #33826
Updates #32781
Updates #34468

Change-Id: I7d0b55df8cdf8e7d2277f1814299b083c2692e48
Reviewed-on: https://go-review.googlesource.com/c/go/+/196957
Run-TryBot: Bryan C. Mills <bcmills@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarDmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Reviewed-by: default avatarMartin Möhrmann <moehrmann@google.com>
parent 37c033b0
...@@ -1863,8 +1863,9 @@ ...@@ -1863,8 +1863,9 @@
(XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c) (XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
-> (EXTRWconst [32-c] x2 x) -> (EXTRWconst [32-c] x2 x)
// Rewrite special pairs of shifts to AND. // Generic rules rewrite certain AND to a pair of shifts.
// On ARM64 the bitmask can fit into an instruction. // However, on ARM64 the bitmask can fit into an instruction.
// Rewrite it back to AND.
(SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1<<uint(64-c)-1] x) // mask out high bits (SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1<<uint(64-c)-1] x) // mask out high bits
(SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits (SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits
...@@ -1970,8 +1971,6 @@ ...@@ -1970,8 +1971,6 @@
-> (BFXIL [bfc] y x) -> (BFXIL [bfc] y x)
(ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y)) && sc == getARM64BFwidth(bfc) (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y)) && sc == getARM64BFwidth(bfc)
-> (BFXIL [bfc] y x) -> (BFXIL [bfc] y x)
(ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x)) && lc < rc && ac == ^((1<<uint(64-rc)-1))
-> (BFXIL [armBFAuxInt(rc-lc, 64-rc)] y x)
// do combined loads // do combined loads
// little endian loads // little endian loads
......
...@@ -701,8 +701,6 @@ ...@@ -701,8 +701,6 @@
// may need to be reworked when NIHH/OIHH are added // may need to be reworked when NIHH/OIHH are added
(SRDconst [1] (SLDconst [1] (LGDR <t> x))) -> (LGDR <t> (LPDFR <x.Type> x)) (SRDconst [1] (SLDconst [1] (LGDR <t> x))) -> (LGDR <t> (LPDFR <x.Type> x))
(LDGR <t> (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR <t> x)) (LDGR <t> (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR <t> x))
(AND (MOVDconst [^(-1<<63)]) (LGDR <t> x)) -> (LGDR <t> (LPDFR <x.Type> x))
(LDGR <t> (AND (MOVDconst [^(-1<<63)]) x)) -> (LPDFR (LDGR <t> x))
(OR (MOVDconst [-1<<63]) (LGDR <t> x)) -> (LGDR <t> (LNDFR <x.Type> x)) (OR (MOVDconst [-1<<63]) (LGDR <t> x)) -> (LGDR <t> (LNDFR <x.Type> x))
(LDGR <t> (OR (MOVDconst [-1<<63]) x)) -> (LNDFR (LDGR <t> x)) (LDGR <t> (OR (MOVDconst [-1<<63]) x)) -> (LNDFR (LDGR <t> x))
...@@ -712,8 +710,6 @@ ...@@ -712,8 +710,6 @@
// detect copysign // detect copysign
(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x)) (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x))
(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x)) (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x))
(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
(CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y) (CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y)
(CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y) (CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y)
......
...@@ -542,6 +542,14 @@ ...@@ -542,6 +542,14 @@
(Slicemask (Const64 [x])) && x > 0 -> (Const64 [-1]) (Slicemask (Const64 [x])) && x > 0 -> (Const64 [-1])
(Slicemask (Const64 [0])) -> (Const64 [0]) (Slicemask (Const64 [0])) -> (Const64 [0])
// Rewrite AND of consts as shifts if possible, slightly faster for 64 bit operands
// leading zeros can be shifted left, then right
(And64 <t> (Const64 [y]) x) && nlz(y) + nto(y) == 64 && nto(y) >= 32
-> (Rsh64Ux64 (Lsh64x64 <t> x (Const64 <t> [nlz(y)])) (Const64 <t> [nlz(y)]))
// trailing zeros can be shifted right, then left
(And64 <t> (Const64 [y]) x) && nlo(y) + ntz(y) == 64 && ntz(y) >= 32
-> (Lsh64x64 (Rsh64Ux64 <t> x (Const64 <t> [ntz(y)])) (Const64 <t> [ntz(y)]))
// simplifications often used for lengths. e.g. len(s[i:i+5])==5 // simplifications often used for lengths. e.g. len(s[i:i+5])==5
(Sub(64|32|16|8) (Add(64|32|16|8) x y) x) -> y (Sub(64|32|16|8) (Add(64|32|16|8) x y) x) -> y
(Sub(64|32|16|8) (Add(64|32|16|8) x y) y) -> x (Sub(64|32|16|8) (Add(64|32|16|8) x y) y) -> x
......
...@@ -28467,33 +28467,6 @@ func rewriteValueARM64_OpARM64ORshiftRL_0(v *Value) bool { ...@@ -28467,33 +28467,6 @@ func rewriteValueARM64_OpARM64ORshiftRL_0(v *Value) bool {
v.AddArg(y) v.AddArg(y)
return true return true
} }
// match: (ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x))
// cond: lc < rc && ac == ^((1<<uint(64-rc)-1))
// result: (BFXIL [armBFAuxInt(rc-lc, 64-rc)] y x)
for {
rc := v.AuxInt
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64ANDconst {
break
}
ac := v_0.AuxInt
y := v_0.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64SLLconst {
break
}
lc := v_1.AuxInt
x := v_1.Args[0]
if !(lc < rc && ac == ^(1<<uint(64-rc)-1)) {
break
}
v.reset(OpARM64BFXIL)
v.AuxInt = armBFAuxInt(rc-lc, 64-rc)
v.AddArg(y)
v.AddArg(x)
return true
}
return false return false
} }
func rewriteValueARM64_OpARM64RORWconst_0(v *Value) bool { func rewriteValueARM64_OpARM64RORWconst_0(v *Value) bool {
......
...@@ -5735,6 +5735,112 @@ func rewriteValuegeneric_OpAnd64_10(v *Value) bool { ...@@ -5735,6 +5735,112 @@ func rewriteValuegeneric_OpAnd64_10(v *Value) bool {
v.AddArg(y) v.AddArg(y)
return true return true
} }
// match: (And64 <t> (Const64 [y]) x)
// cond: nlz(y) + nto(y) == 64 && nto(y) >= 32
// result: (Rsh64Ux64 (Lsh64x64 <t> x (Const64 <t> [nlz(y)])) (Const64 <t> [nlz(y)]))
for {
t := v.Type
x := v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpConst64 {
break
}
y := v_0.AuxInt
if !(nlz(y)+nto(y) == 64 && nto(y) >= 32) {
break
}
v.reset(OpRsh64Ux64)
v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpConst64, t)
v1.AuxInt = nlz(y)
v0.AddArg(v1)
v.AddArg(v0)
v2 := b.NewValue0(v.Pos, OpConst64, t)
v2.AuxInt = nlz(y)
v.AddArg(v2)
return true
}
// match: (And64 <t> x (Const64 [y]))
// cond: nlz(y) + nto(y) == 64 && nto(y) >= 32
// result: (Rsh64Ux64 (Lsh64x64 <t> x (Const64 <t> [nlz(y)])) (Const64 <t> [nlz(y)]))
for {
t := v.Type
_ = v.Args[1]
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpConst64 {
break
}
y := v_1.AuxInt
if !(nlz(y)+nto(y) == 64 && nto(y) >= 32) {
break
}
v.reset(OpRsh64Ux64)
v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpConst64, t)
v1.AuxInt = nlz(y)
v0.AddArg(v1)
v.AddArg(v0)
v2 := b.NewValue0(v.Pos, OpConst64, t)
v2.AuxInt = nlz(y)
v.AddArg(v2)
return true
}
// match: (And64 <t> (Const64 [y]) x)
// cond: nlo(y) + ntz(y) == 64 && ntz(y) >= 32
// result: (Lsh64x64 (Rsh64Ux64 <t> x (Const64 <t> [ntz(y)])) (Const64 <t> [ntz(y)]))
for {
t := v.Type
x := v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpConst64 {
break
}
y := v_0.AuxInt
if !(nlo(y)+ntz(y) == 64 && ntz(y) >= 32) {
break
}
v.reset(OpLsh64x64)
v0 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpConst64, t)
v1.AuxInt = ntz(y)
v0.AddArg(v1)
v.AddArg(v0)
v2 := b.NewValue0(v.Pos, OpConst64, t)
v2.AuxInt = ntz(y)
v.AddArg(v2)
return true
}
// match: (And64 <t> x (Const64 [y]))
// cond: nlo(y) + ntz(y) == 64 && ntz(y) >= 32
// result: (Lsh64x64 (Rsh64Ux64 <t> x (Const64 <t> [ntz(y)])) (Const64 <t> [ntz(y)]))
for {
t := v.Type
_ = v.Args[1]
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpConst64 {
break
}
y := v_1.AuxInt
if !(nlo(y)+ntz(y) == 64 && ntz(y) >= 32) {
break
}
v.reset(OpLsh64x64)
v0 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
v0.AddArg(x)
v1 := b.NewValue0(v.Pos, OpConst64, t)
v1.AuxInt = ntz(y)
v0.AddArg(v1)
v.AddArg(v0)
v2 := b.NewValue0(v.Pos, OpConst64, t)
v2.AuxInt = ntz(y)
v.AddArg(v2)
return true
}
// match: (And64 (And64 i:(Const64 <t>) z) x) // match: (And64 (And64 i:(Const64 <t>) z) x)
// cond: (z.Op != OpConst64 && x.Op != OpConst64) // cond: (z.Op != OpConst64 && x.Op != OpConst64)
// result: (And64 i (And64 <t> z x)) // result: (And64 i (And64 <t> z x))
...@@ -5761,6 +5867,10 @@ func rewriteValuegeneric_OpAnd64_10(v *Value) bool { ...@@ -5761,6 +5867,10 @@ func rewriteValuegeneric_OpAnd64_10(v *Value) bool {
v.AddArg(v0) v.AddArg(v0)
return true return true
} }
return false
}
func rewriteValuegeneric_OpAnd64_20(v *Value) bool {
b := v.Block
// match: (And64 (And64 z i:(Const64 <t>)) x) // match: (And64 (And64 z i:(Const64 <t>)) x)
// cond: (z.Op != OpConst64 && x.Op != OpConst64) // cond: (z.Op != OpConst64 && x.Op != OpConst64)
// result: (And64 i (And64 <t> z x)) // result: (And64 i (And64 <t> z x))
...@@ -5874,10 +5984,6 @@ func rewriteValuegeneric_OpAnd64_10(v *Value) bool { ...@@ -5874,10 +5984,6 @@ func rewriteValuegeneric_OpAnd64_10(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
return false
}
func rewriteValuegeneric_OpAnd64_20(v *Value) bool {
b := v.Block
// match: (And64 (Const64 <t> [c]) (And64 x (Const64 <t> [d]))) // match: (And64 (Const64 <t> [c]) (And64 x (Const64 <t> [d])))
// cond: // cond:
// result: (And64 (Const64 <t> [c&d]) x) // result: (And64 (Const64 <t> [c&d]) x)
...@@ -81,7 +81,7 @@ func abs32(x float32) float32 { ...@@ -81,7 +81,7 @@ func abs32(x float32) float32 {
// Check that it's using integer registers // Check that it's using integer registers
func copysign(a, b, c float64) { func copysign(a, b, c float64) {
// amd64:"BTRQ\t[$]63","ANDQ","ORQ" // amd64:"BTRQ\t[$]63","SHRQ\t[$]63","SHLQ\t[$]63","ORQ"
// s390x:"CPSDR",-"MOVD" (no integer load/store) // s390x:"CPSDR",-"MOVD" (no integer load/store)
// ppc64:"FCPSGN" // ppc64:"FCPSGN"
// ppc64le:"FCPSGN" // ppc64le:"FCPSGN"
...@@ -100,7 +100,7 @@ func copysign(a, b, c float64) { ...@@ -100,7 +100,7 @@ func copysign(a, b, c float64) {
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store) // s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63) sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63)
// amd64:"ANDQ","ORQ" // amd64:-"SHLQ\t[$]1",-"SHRQ\t[$]1","SHRQ\t[$]63","SHLQ\t[$]63","ORQ"
// s390x:"CPSDR\t",-"MOVD\t" (no integer load/store) // s390x:"CPSDR\t",-"MOVD\t" (no integer load/store)
// ppc64:"FCPSGN" // ppc64:"FCPSGN"
// ppc64le:"FCPSGN" // ppc64le:"FCPSGN"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment