Commit 04945edd authored by Alexandru Moșoi's avatar Alexandru Moșoi Committed by Alexandru Moșoi

cmd/compile: replaces ANDQ with MOV?ZX

Where possible replace ANDQ with MOV?ZX.
Takes care that we don't regress wrt bounds checking,
for example [1000]int{}[i&255].

According to "Intel 64 and IA-32 Architectures Optimization Reference
Manual" Section: "3.5.1.13 Zero-Latency MOV Instructions"
MOV?ZX instructions have zero latency on newer processors.

Updates #15105

Change-Id: I63539fdbc5812d5563aa1ebc49eca035bd307997
Reviewed-on: https://go-review.googlesource.com/21508Reviewed-by: default avatarАйнар Гарипов <gugl.zadolbal@gmail.com>
Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent cdc0ebbe
...@@ -587,6 +587,11 @@ ...@@ -587,6 +587,11 @@
(CMPB x (MOVBconst [c])) -> (CMPBconst x [c]) (CMPB x (MOVBconst [c])) -> (CMPBconst x [c])
(CMPB (MOVBconst [c]) x) -> (InvertFlags (CMPBconst x [c])) (CMPB (MOVBconst [c]) x) -> (InvertFlags (CMPBconst x [c]))
// Using MOVBQZX instead of ANDQ is cheaper.
(ANDQconst [0xFF] x) -> (MOVBQZX x)
(ANDQconst [0xFFFF] x) -> (MOVWQZX x)
(ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x)
// strength reduction // strength reduction
// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
// 1 - addq, shlq, leaq, negq // 1 - addq, shlq, leaq, negq
...@@ -1093,6 +1098,9 @@ ...@@ -1093,6 +1098,9 @@
(CMPBconst (MOVBconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT) (CMPBconst (MOVBconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT)
// Other known comparisons. // Other known comparisons.
(CMPQconst (MOVBQZX _) [c]) && 0xFF < c -> (FlagLT_ULT)
(CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c -> (FlagLT_ULT)
(CMPQconst (MOVLQZX _) [c]) && 0xFFFFFFFF < c -> (FlagLT_ULT)
(CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT)
(CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT) (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT)
(CMPWconst (ANDWconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT) (CMPWconst (ANDWconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT)
......
...@@ -1838,6 +1838,42 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value, config *Config) bool { ...@@ -1838,6 +1838,42 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value, config *Config) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (ANDQconst [0xFF] x)
// cond:
// result: (MOVBQZX x)
for {
if v.AuxInt != 0xFF {
break
}
x := v.Args[0]
v.reset(OpAMD64MOVBQZX)
v.AddArg(x)
return true
}
// match: (ANDQconst [0xFFFF] x)
// cond:
// result: (MOVWQZX x)
for {
if v.AuxInt != 0xFFFF {
break
}
x := v.Args[0]
v.reset(OpAMD64MOVWQZX)
v.AddArg(x)
return true
}
// match: (ANDQconst [0xFFFFFFFF] x)
// cond:
// result: (MOVLQZX x)
for {
if v.AuxInt != 0xFFFFFFFF {
break
}
x := v.Args[0]
v.reset(OpAMD64MOVLQZX)
v.AddArg(x)
return true
}
// match: (ANDQconst [0] _) // match: (ANDQconst [0] _)
// cond: // cond:
// result: (MOVQconst [0]) // result: (MOVQconst [0])
...@@ -3026,6 +3062,51 @@ func rewriteValueAMD64_OpAMD64CMPQconst(v *Value, config *Config) bool { ...@@ -3026,6 +3062,51 @@ func rewriteValueAMD64_OpAMD64CMPQconst(v *Value, config *Config) bool {
v.reset(OpAMD64FlagGT_UGT) v.reset(OpAMD64FlagGT_UGT)
return true return true
} }
// match: (CMPQconst (MOVBQZX _) [c])
// cond: 0xFF < c
// result: (FlagLT_ULT)
for {
v_0 := v.Args[0]
if v_0.Op != OpAMD64MOVBQZX {
break
}
c := v.AuxInt
if !(0xFF < c) {
break
}
v.reset(OpAMD64FlagLT_ULT)
return true
}
// match: (CMPQconst (MOVWQZX _) [c])
// cond: 0xFFFF < c
// result: (FlagLT_ULT)
for {
v_0 := v.Args[0]
if v_0.Op != OpAMD64MOVWQZX {
break
}
c := v.AuxInt
if !(0xFFFF < c) {
break
}
v.reset(OpAMD64FlagLT_ULT)
return true
}
// match: (CMPQconst (MOVLQZX _) [c])
// cond: 0xFFFFFFFF < c
// result: (FlagLT_ULT)
for {
v_0 := v.Args[0]
if v_0.Op != OpAMD64MOVLQZX {
break
}
c := v.AuxInt
if !(0xFFFFFFFF < c) {
break
}
v.reset(OpAMD64FlagLT_ULT)
return true
}
// match: (CMPQconst (ANDQconst _ [m]) [n]) // match: (CMPQconst (ANDQconst _ [m]) [n])
// cond: 0 <= m && m < n // cond: 0 <= m && m < n
// result: (FlagLT_ULT) // result: (FlagLT_ULT)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment