Commit 225ef76c authored by Cherry Zhang's avatar Cherry Zhang

[dev.ssa] cmd/compile: fix scheduling of tuple ops

We want tuple-reading ops immediately follow tuple-generating op, so
that tuple values will not be spilled/copied.

The mechanism introduced in the previous CL cannot really avoid tuples
interleaving. In this CL we always emit tuple and their selectors together.
Maybe remove the tuple scores if it does not help on performance (todo).

Also let tighten not move tuple-reading ops across blocks.

In the previous CL a special case of regenerating flags with tuple-reading
pseudo-op is added, but it did not cover end-of-block case. This is fixed
in this CL and the condition is generalized.

Progress on SSA backend for ARM. Still not complete.

Updates #15365.

Change-Id: I8980b34e7a64eb98153540e9e19a3782e20406ff
Reviewed-on: https://go-review.googlesource.com/23792Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent 59e11d78
...@@ -95,18 +95,9 @@ func flagalloc(f *Func) { ...@@ -95,18 +95,9 @@ func flagalloc(f *Func) {
continue continue
} }
// Recalculate a // Recalculate a
var c1 *Value c := copyFlags(a, b)
if a.Op == OpARMCarry {
// Pseudo-op does not generate flags, its arg actually does
//TODO: generalize this condition?
c1 = a.Args[0].copyInto(b)
}
c := a.copyInto(b)
// Update v. // Update v.
v.SetArg(i, c) v.SetArg(i, c)
if c1 != nil {
c.SetArg(0, c1)
}
// Remember the most-recently computed flag value. // Remember the most-recently computed flag value.
flag = a flag = a
} }
...@@ -128,7 +119,7 @@ func flagalloc(f *Func) { ...@@ -128,7 +119,7 @@ func flagalloc(f *Func) {
if v := end[b.ID]; v != nil && v != flag { if v := end[b.ID]; v != nil && v != flag {
// Need to reissue flag generator for use by // Need to reissue flag generator for use by
// subsequent blocks. // subsequent blocks.
_ = v.copyInto(b) copyFlags(v, b)
// Note: this flag generator is not properly linked up // Note: this flag generator is not properly linked up
// with the flag users. This breaks the SSA representation. // with the flag users. This breaks the SSA representation.
// We could fix up the users with another pass, but for now // We could fix up the users with another pass, but for now
...@@ -142,3 +133,19 @@ func flagalloc(f *Func) { ...@@ -142,3 +133,19 @@ func flagalloc(f *Func) {
b.FlagsLiveAtEnd = end[b.ID] != nil b.FlagsLiveAtEnd = end[b.ID] != nil
} }
} }
// copyFlags copies v (flag generator) into b, returns the copy.
// If v's arg is also flags, copy recursively.
func copyFlags(v *Value, b *Block) *Value {
flagsArgs := make(map[int]*Value)
for i, a := range v.Args {
if a.Type.IsFlags() || a.Type.IsTuple() {
flagsArgs[i] = copyFlags(a, b)
}
}
c := v.copyInto(b)
for i, a := range flagsArgs {
c.SetArg(i, a)
}
return c
}
...@@ -211,6 +211,7 @@ func schedule(f *Func) { ...@@ -211,6 +211,7 @@ func schedule(f *Func) {
// Schedule highest priority value, update use counts, repeat. // Schedule highest priority value, update use counts, repeat.
order = order[:0] order = order[:0]
tuples := make(map[ID][]*Value)
for { for {
// Find highest priority schedulable value. // Find highest priority schedulable value.
// Note that schedule is assembled backwards. // Note that schedule is assembled backwards.
...@@ -222,7 +223,31 @@ func schedule(f *Func) { ...@@ -222,7 +223,31 @@ func schedule(f *Func) {
v := heap.Pop(priq).(*Value) v := heap.Pop(priq).(*Value)
// Add it to the schedule. // Add it to the schedule.
order = append(order, v) // Do not emit tuple-reading ops until we're ready to emit the tuple-generating op.
//TODO: maybe remove ReadTuple score above, if it does not help on performance
switch {
case v.Op == OpARMCarry || v.Op == OpARMLoweredSelect0:
if tuples[v.Args[0].ID] == nil {
tuples[v.Args[0].ID] = make([]*Value, 2)
}
tuples[v.Args[0].ID][0] = v
case v.Op == OpARMLoweredSelect1:
if tuples[v.Args[0].ID] == nil {
tuples[v.Args[0].ID] = make([]*Value, 2)
}
tuples[v.Args[0].ID][1] = v
case v.Type.IsTuple() && tuples[v.ID] != nil:
if tuples[v.ID][1] != nil {
order = append(order, tuples[v.ID][1])
}
if tuples[v.ID][0] != nil {
order = append(order, tuples[v.ID][0])
}
delete(tuples, v.ID)
fallthrough
default:
order = append(order, v)
}
// Update use counts of arguments. // Update use counts of arguments.
for _, w := range v.Args { for _, w := range v.Args {
......
...@@ -54,13 +54,16 @@ func tighten(f *Func) { ...@@ -54,13 +54,16 @@ func tighten(f *Func) {
for _, b := range f.Blocks { for _, b := range f.Blocks {
for i := 0; i < len(b.Values); i++ { for i := 0; i < len(b.Values); i++ {
v := b.Values[i] v := b.Values[i]
if v.Op == OpPhi || v.Op == OpGetClosurePtr || v.Op == OpConvert || v.Op == OpArg { switch v.Op {
case OpPhi, OpGetClosurePtr, OpConvert, OpArg, OpSelect0, OpSelect1:
// GetClosurePtr & Arg must stay in entry block. // GetClosurePtr & Arg must stay in entry block.
// OpConvert must not float over call sites. // OpConvert must not float over call sites.
// Select{0,1} reads a tuple, it must stay with the tuple-generating op.
// TODO do we instead need a dependence edge of some sort for OpConvert? // TODO do we instead need a dependence edge of some sort for OpConvert?
// Would memory do the trick, or do we need something else that relates // Would memory do the trick, or do we need something else that relates
// to safe point operations? // to safe point operations?
continue continue
default:
} }
if len(v.Args) > 0 && v.Args[len(v.Args)-1].Type.IsMemory() { if len(v.Args) > 0 && v.Args[len(v.Args)-1].Type.IsMemory() {
// We can't move values which have a memory arg - it might // We can't move values which have a memory arg - it might
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment