Commit 03c0f3fe authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder Committed by Russ Cox

cmd/gc: alias more variables during register allocation

This is joint work with Daniel Morsing.

In order for the register allocator to alias two variables, they must have the same width, stack offset, and etype. Code generation was altering a variable's etype in a few places. This prevented the variable from being moved to a register, which in turn prevented peephole optimization. This failure to alias was very common, with almost 23,000 instances just running make.bash.

This phenomenon was not visible in the register allocation debug output because the variables that failed to alias had the same name. The debugging-only change to bits.c fixes this by printing the variable number with its name.

This CL fixes the source of all etype mismatches for 6g, all but one case for 8g, and depressingly few cases for 5g. (I believe that extending CL 6819083 to 5g is a prerequisite.) Fixing the remaining cases in 8g and 5g is work for the future.

The etype mismatch fixes are:

* [gc] Slicing changed the type of the base pointer into a uintptr in order to perform arithmetic on it. Instead, support addition directly on pointers.

* [*g] OSPTR was giving type uintptr to slice base pointers; undo that. This arose, for example, while compiling copy(dst, src).

* [8g] 64 bit float conversion was assigning int64 type during codegen, overwriting the existing uint64 type.

Note that some etype mismatches are appropriate, such as a struct with a single field or an array with a single element.

With these fixes, the number of registerizations that occur while running make.bash for 6g increases ~10%. Hello world binary size shrinks ~1.5%. Running all benchmarks in the standard library show performance improvements ranging from nominal to substantive (>10%); a full comparison using 6g on my laptop is available at https://gist.github.com/josharian/8f9b5beb46667c272064. The microbenchmarks must be taken with a grain of salt; see issue 7920. The few benchmarks that show real regressions are likely due to issue 7920. I manually examined the generated code for the top few regressions and none had any assembly output changes. The few benchmarks that show extraordinary improvements are likely also due to issue 7920.

Performance results from 8g appear similar to 6g.

5g shows no performance improvements. This is not surprising, given the discussion above.

Update #7316

LGTM=rsc
R=rsc, daniel.morsing, bradfitz
CC=dave, golang-codereviews
https://golang.org/cl/91850043
parent 2497c430
...@@ -254,6 +254,7 @@ cgen(Node *n, Node *res) ...@@ -254,6 +254,7 @@ cgen(Node *n, Node *res)
case OOR: case OOR:
case OXOR: case OXOR:
case OADD: case OADD:
case OADDPTR:
case OMUL: case OMUL:
a = optoas(n->op, nl->type); a = optoas(n->op, nl->type);
goto sbop; goto sbop;
......
...@@ -1366,7 +1366,7 @@ naddr(Node *n, Addr *a, int canemitcode) ...@@ -1366,7 +1366,7 @@ naddr(Node *n, Addr *a, int canemitcode)
naddr(n->left, a, canemitcode); naddr(n->left, a, canemitcode);
if(a->type == D_CONST && a->offset == 0) if(a->type == D_CONST && a->offset == 0)
break; // ptr(nil) break; // ptr(nil)
a->etype = simtype[TUINTPTR]; a->etype = simtype[tptr];
a->offset += Array_array; a->offset += Array_array;
a->width = widthptr; a->width = widthptr;
break; break;
...@@ -1592,6 +1592,7 @@ optoas(int op, Type *t) ...@@ -1592,6 +1592,7 @@ optoas(int op, Type *t)
case CASE(OADD, TINT32): case CASE(OADD, TINT32):
case CASE(OADD, TUINT32): case CASE(OADD, TUINT32):
case CASE(OADD, TPTR32): case CASE(OADD, TPTR32):
case CASE(OADDPTR, TPTR32):
a = AADD; a = AADD;
break; break;
......
...@@ -247,6 +247,7 @@ cgen(Node *n, Node *res) ...@@ -247,6 +247,7 @@ cgen(Node *n, Node *res)
case OOR: case OOR:
case OXOR: case OXOR:
case OADD: case OADD:
case OADDPTR:
case OMUL: case OMUL:
a = optoas(n->op, nl->type); a = optoas(n->op, nl->type);
if(a == AIMULB) { if(a == AIMULB) {
......
...@@ -1300,7 +1300,7 @@ naddr(Node *n, Addr *a, int canemitcode) ...@@ -1300,7 +1300,7 @@ naddr(Node *n, Addr *a, int canemitcode)
naddr(n->left, a, canemitcode); naddr(n->left, a, canemitcode);
if(a->type == D_CONST && a->offset == 0) if(a->type == D_CONST && a->offset == 0)
break; // ptr(nil) break; // ptr(nil)
a->etype = simtype[TUINTPTR]; a->etype = simtype[tptr];
a->offset += Array_array; a->offset += Array_array;
a->width = widthptr; a->width = widthptr;
break; break;
...@@ -1533,12 +1533,14 @@ optoas(int op, Type *t) ...@@ -1533,12 +1533,14 @@ optoas(int op, Type *t)
case CASE(OADD, TINT32): case CASE(OADD, TINT32):
case CASE(OADD, TUINT32): case CASE(OADD, TUINT32):
case CASE(OADD, TPTR32): case CASE(OADD, TPTR32):
case CASE(OADDPTR, TPTR32):
a = AADDL; a = AADDL;
break; break;
case CASE(OADD, TINT64): case CASE(OADD, TINT64):
case CASE(OADD, TUINT64): case CASE(OADD, TUINT64):
case CASE(OADD, TPTR64): case CASE(OADD, TPTR64):
case CASE(OADDPTR, TPTR64):
a = AADDQ; a = AADDQ;
break; break;
......
...@@ -242,6 +242,7 @@ cgen(Node *n, Node *res) ...@@ -242,6 +242,7 @@ cgen(Node *n, Node *res)
case OOR: case OOR:
case OXOR: case OXOR:
case OADD: case OADD:
case OADDPTR:
case OMUL: case OMUL:
a = optoas(n->op, nl->type); a = optoas(n->op, nl->type);
if(a == AIMULB) { if(a == AIMULB) {
......
...@@ -432,6 +432,7 @@ optoas(int op, Type *t) ...@@ -432,6 +432,7 @@ optoas(int op, Type *t)
case CASE(OADD, TINT32): case CASE(OADD, TINT32):
case CASE(OADD, TUINT32): case CASE(OADD, TUINT32):
case CASE(OADD, TPTR32): case CASE(OADD, TPTR32):
case CASE(OADDPTR, TPTR32):
a = AADDL; a = AADDL;
break; break;
...@@ -1687,7 +1688,6 @@ floatmove(Node *f, Node *t) ...@@ -1687,7 +1688,6 @@ floatmove(Node *f, Node *t)
gins(ACMPL, &thi, ncon(0)); gins(ACMPL, &thi, ncon(0));
p1 = gbranch(AJLT, T, 0); p1 = gbranch(AJLT, T, 0);
// native // native
t1.type = types[TINT64];
nodreg(&r1, types[tt], D_F0); nodreg(&r1, types[tt], D_F0);
gins(AFMOVV, &t1, &r1); gins(AFMOVV, &t1, &r1);
if(tt == TFLOAT32) if(tt == TFLOAT32)
...@@ -2327,7 +2327,7 @@ naddr(Node *n, Addr *a, int canemitcode) ...@@ -2327,7 +2327,7 @@ naddr(Node *n, Addr *a, int canemitcode)
naddr(n->left, a, canemitcode); naddr(n->left, a, canemitcode);
if(a->type == D_CONST && a->offset == 0) if(a->type == D_CONST && a->offset == 0)
break; // ptr(nil) break; // ptr(nil)
a->etype = simtype[TUINTPTR]; a->etype = simtype[tptr];
a->offset += Array_array; a->offset += Array_array;
a->width = widthptr; a->width = widthptr;
break; break;
......
...@@ -153,7 +153,7 @@ Qconv(Fmt *fp) ...@@ -153,7 +153,7 @@ Qconv(Fmt *fp)
if(var[i].node == N || var[i].node->sym == S) if(var[i].node == N || var[i].node->sym == S)
fmtprint(fp, "$%d", i); fmtprint(fp, "$%d", i);
else { else {
fmtprint(fp, "%s", var[i].node->sym->name); fmtprint(fp, "%s(%d)", var[i].node->sym->name, i);
if(var[i].offset != 0) if(var[i].offset != 0)
fmtprint(fp, "%+lld", (vlong)var[i].offset); fmtprint(fp, "%+lld", (vlong)var[i].offset);
} }
......
...@@ -829,7 +829,6 @@ cgen_slice(Node *n, Node *res) ...@@ -829,7 +829,6 @@ cgen_slice(Node *n, Node *res)
src = *n->left; src = *n->left;
if(n->op == OSLICE || n->op == OSLICE3 || n->op == OSLICESTR) if(n->op == OSLICE || n->op == OSLICE3 || n->op == OSLICESTR)
src.xoffset += Array_array; src.xoffset += Array_array;
src.type = types[TUINTPTR];
if(n->op == OSLICEARR || n->op == OSLICE3ARR) { if(n->op == OSLICEARR || n->op == OSLICE3ARR) {
if(!isptr[n->left->type->etype]) if(!isptr[n->left->type->etype])
...@@ -842,9 +841,11 @@ cgen_slice(Node *n, Node *res) ...@@ -842,9 +841,11 @@ cgen_slice(Node *n, Node *res)
cgen(add, base); cgen(add, base);
} }
} else if(offs == N) { } else if(offs == N) {
src.type = types[tptr];
cgen(&src, base); cgen(&src, base);
} else { } else {
add = nod(OADD, &src, offs); src.type = types[tptr];
add = nod(OADDPTR, &src, offs);
typecheck(&add, Erv); typecheck(&add, Erv);
cgen(add, base); cgen(add, base);
} }
...@@ -855,7 +856,7 @@ cgen_slice(Node *n, Node *res) ...@@ -855,7 +856,7 @@ cgen_slice(Node *n, Node *res)
// dst.array = src.array [ + lo *width ] // dst.array = src.array [ + lo *width ]
dst = *res; dst = *res;
dst.xoffset += Array_array; dst.xoffset += Array_array;
dst.type = types[TUINTPTR]; dst.type = types[tptr];
cgen(base, &dst); cgen(base, &dst);
......
...@@ -445,6 +445,7 @@ enum ...@@ -445,6 +445,7 @@ enum
OSUB, // x - y OSUB, // x - y
OOR, // x | y OOR, // x | y
OXOR, // x ^ y OXOR, // x ^ y
OADDPTR, // ptr + uintptr, inserted by compiler only, used to avoid unsafe type changes during codegen
OADDSTR, // s + "foo" OADDSTR, // s + "foo"
OADDR, // &x OADDR, // &x
OANDAND, // b0 && b1 OANDAND, // b0 && b1
......
...@@ -535,6 +535,19 @@ reswitch: ...@@ -535,6 +535,19 @@ reswitch:
op = n->etype; op = n->etype;
goto arith; goto arith;
case OADDPTR:
ok |= Erv;
l = typecheck(&n->left, Erv);
r = typecheck(&n->right, Erv);
if(l->type == T || r->type == T)
goto error;
if(l->type->etype != tptr)
fatal("bad OADDPTR left type %E for %N", l->type->etype, n->left);
if(r->type->etype != TUINTPTR)
fatal("bad OADDPTR right type %E for %N", r->type->etype, n->right);
n->type = types[tptr];
goto ret;
case OADD: case OADD:
case OAND: case OAND:
case OANDAND: case OANDAND:
......
// runoutput
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Issue 7316
// This test exercises all types of numeric conversions, which was one
// of the sources of etype mismatch during register allocation in 8g.
package main
import "fmt"
const tpl = `
func init() {
var i %s
j := %s(i)
_ = %s(j)
}
`
func main() {
fmt.Println("package main")
ntypes := []string{
"byte", "rune", "uintptr",
"float32", "float64",
"int", "int8", "int16", "int32", "int64",
"uint", "uint8", "uint16", "uint32", "uint64",
}
for i, from := range ntypes {
for _, to := range ntypes[i:] {
fmt.Printf(tpl, from, to, from)
}
}
fmt.Println("func main() {}")
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment