Commit a98bb7e2 authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

cmd/compile: only optimize chained Moves on disjoint stack mem

This optimization is not sound if A, B, or C
might overlap with each other.

Thanks to Michael Munday for pointing this
out during the review of CL 143479.

This reduces the number of times this optimization
triggers during make.bash from 386 to 74.

This is unfortunate, but I don't see an obvious way around it,
short of souping up the disjointness analysis.

name        old object-bytes  new object-bytes  delta
Template          507kB ± 0%        507kB ± 0%   +0.13%  (p=0.008 n=5+5)
Unicode           225kB ± 0%        225kB ± 0%     ~     (all equal)
GoTypes          1.85MB ± 0%       1.85MB ± 0%   +0.02%  (p=0.008 n=5+5)
Flate             328kB ± 0%        328kB ± 0%     ~     (all equal)
GoParser          402kB ± 0%        402kB ± 0%     ~     (all equal)
Reflect          1.41MB ± 0%       1.41MB ± 0%     ~     (all equal)
Tar               457kB ± 0%        458kB ± 0%   +0.20%  (p=0.008 n=5+5)
XML               600kB ± 0%        601kB ± 0%   +0.03%  (p=0.008 n=5+5)

Change-Id: Ida408cb627145ba9faf473a78606f050c2f3f51c
Reviewed-on: https://go-review.googlesource.com/c/145208
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarMichael Munday <mike.munday@ibm.com>
parent 968742a8
...@@ -1804,11 +1804,16 @@ ...@@ -1804,11 +1804,16 @@
// Later passes (deadstore, elim unread auto) will remove the A -> B move, if possible. // Later passes (deadstore, elim unread auto) will remove the A -> B move, if possible.
// This happens most commonly when B is an autotmp inserted earlier // This happens most commonly when B is an autotmp inserted earlier
// during compilation to ensure correctness. // during compilation to ensure correctness.
(Move {t1} [s1] dst tmp1 midmem:(Move {t2} [s2] tmp2 src _)) // Take care that overlapping moves are preserved.
&& s1 == s2 // Restrict this optimization to the stack, to avoid duplicating loads from the heap;
// see CL 145208 for discussion.
(Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _))
&& t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq
&& isSamePtr(tmp1, tmp2) && isSamePtr(tmp1, tmp2)
-> (Move {t1} [s1] dst src midmem) && isStackPtr(src)
&& disjoint(src, s, tmp2, s)
&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
-> (Move {t1} [s] dst src midmem)
// Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go). // Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go).
// However, this rule is needed to prevent the previous rule from looping forever in such cases. // However, this rule is needed to prevent the previous rule from looping forever in such cases.
......
...@@ -530,6 +530,13 @@ func isSamePtr(p1, p2 *Value) bool { ...@@ -530,6 +530,13 @@ func isSamePtr(p1, p2 *Value) bool {
return false return false
} }
func isStackPtr(v *Value) bool {
for v.Op == OpOffPtr || v.Op == OpAddPtr {
v = v.Args[0]
}
return v.Op == OpSP || v.Op == OpLocalAddr
}
// disjoint reports whether the memory region specified by [p1:p1+n1) // disjoint reports whether the memory region specified by [p1:p1+n1)
// does not overlap with [p2:p2+n2). // does not overlap with [p2:p2+n2).
// A return value of false does not imply the regions overlap. // A return value of false does not imply the regions overlap.
......
...@@ -17234,6 +17234,8 @@ func rewriteValuegeneric_OpMove_10(v *Value) bool { ...@@ -17234,6 +17234,8 @@ func rewriteValuegeneric_OpMove_10(v *Value) bool {
func rewriteValuegeneric_OpMove_20(v *Value) bool { func rewriteValuegeneric_OpMove_20(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
config := b.Func.Config
_ = config
// match: (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} (OffPtr <tt2> [o2] p2) d1 (Store {t3} (OffPtr <tt3> [o3] p3) d2 (Store {t4} (OffPtr <tt4> [o4] p4) d3 (Store {t5} (OffPtr <tt5> [o5] p5) d4 (Zero {t6} [n] p6 _))))))) // match: (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} (OffPtr <tt2> [o2] p2) d1 (Store {t3} (OffPtr <tt3> [o3] p3) d2 (Store {t4} (OffPtr <tt4> [o4] p4) d3 (Store {t5} (OffPtr <tt5> [o5] p5) d4 (Zero {t6} [n] p6 _)))))))
// cond: isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6) && alignof(t2) <= alignof(t1) && alignof(t3) <= alignof(t1) && alignof(t4) <= alignof(t1) && alignof(t5) <= alignof(t1) && alignof(t6) <= alignof(t1) && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && registerizable(b, t5) && n >= o2 + sizeof(t2) && n >= o3 + sizeof(t3) && n >= o4 + sizeof(t4) && n >= o5 + sizeof(t5) // cond: isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6) && alignof(t2) <= alignof(t1) && alignof(t3) <= alignof(t1) && alignof(t4) <= alignof(t1) && alignof(t5) <= alignof(t1) && alignof(t6) <= alignof(t1) && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && registerizable(b, t5) && n >= o2 + sizeof(t2) && n >= o3 + sizeof(t3) && n >= o4 + sizeof(t4) && n >= o5 + sizeof(t5)
// result: (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [o4] dst) d3 (Store {t5} (OffPtr <tt5> [o5] dst) d4 (Zero {t1} [n] dst mem))))) // result: (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [o4] dst) d3 (Store {t5} (OffPtr <tt5> [o5] dst) d4 (Zero {t1} [n] dst mem)))))
...@@ -17355,11 +17357,11 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool { ...@@ -17355,11 +17357,11 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool {
v.AddArg(v1) v.AddArg(v1)
return true return true
} }
// match: (Move {t1} [s1] dst tmp1 midmem:(Move {t2} [s2] tmp2 src _)) // match: (Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _))
// cond: s1 == s2 && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) // cond: t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
// result: (Move {t1} [s1] dst src midmem) // result: (Move {t1} [s] dst src midmem)
for { for {
s1 := v.AuxInt s := v.AuxInt
t1 := v.Aux t1 := v.Aux
_ = v.Args[2] _ = v.Args[2]
dst := v.Args[0] dst := v.Args[0]
...@@ -17368,16 +17370,18 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool { ...@@ -17368,16 +17370,18 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool {
if midmem.Op != OpMove { if midmem.Op != OpMove {
break break
} }
s2 := midmem.AuxInt if midmem.AuxInt != s {
break
}
t2 := midmem.Aux t2 := midmem.Aux
_ = midmem.Args[2] _ = midmem.Args[2]
tmp2 := midmem.Args[0] tmp2 := midmem.Args[0]
src := midmem.Args[1] src := midmem.Args[1]
if !(s1 == s2 && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2)) { if !(t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) {
break break
} }
v.reset(OpMove) v.reset(OpMove)
v.AuxInt = s1 v.AuxInt = s
v.Aux = t1 v.Aux = t1
v.AddArg(dst) v.AddArg(dst)
v.AddArg(src) v.AddArg(src)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment