Commit e419535f authored by Russ Cox's avatar Russ Cox

5g, 6g, 8g: registerize variables again

My previous CL:

changeset:   9645:ce2e5f44b310
user:        Russ Cox <rsc@golang.org>
date:        Tue Sep 06 10:24:21 2011 -0400
summary:     gc: unify stack frame layout

introduced a bug wherein no variables were
being registerized, making Go programs 2-3x
slower than they had been before.

This CL fixes that bug (along with some others
it was hiding) and adds a test that optimization
makes at least one test case faster.

R=ken2
CC=golang-dev
https://golang.org/cl/5174045
parent f75ff01f
......@@ -307,6 +307,7 @@ datastring(char *s, int len, Addr *a)
a->offset = widthptr+4; // skip header
a->reg = NREG;
a->sym = sym;
a->node = sym->def;
}
/*
......@@ -325,6 +326,7 @@ datagostring(Strlit *sval, Addr *a)
a->offset = 0; // header
a->reg = NREG;
a->sym = sym;
a->node = sym->def;
}
void
......
......@@ -512,6 +512,7 @@ nodarg(Type *t, int fp)
fatal("nodarg: offset not computed for %T", t);
n->xoffset = t->width;
n->addable = 1;
n->orig = t->nname;
fp:
switch(fp) {
......@@ -1263,6 +1264,7 @@ naddr(Node *n, Addr *a, int canemitcode)
a->sym = n->left->sym;
a->type = D_OREG;
a->name = D_PARAM;
a->node = n->left->orig;
break;
case ONAME:
......@@ -1275,6 +1277,9 @@ naddr(Node *n, Addr *a, int canemitcode)
}
a->offset = n->xoffset;
a->sym = n->sym;
a->node = n->orig;
//if(a->node >= (Node*)&n)
// fatal("stack node");
if(a->sym == S)
a->sym = lookup(".noname");
if(n->method) {
......@@ -1293,8 +1298,6 @@ naddr(Node *n, Addr *a, int canemitcode)
break;
case PAUTO:
a->name = D_AUTO;
if (n->sym)
a->node = n->orig;
break;
case PPARAM:
case PPARAMOUT:
......
......@@ -92,8 +92,8 @@ setoutvar(void)
ovar.b[z] |= bit.b[z];
t = structnext(&save);
}
//if(bany(b))
//print("ovars = %Q\n", &ovar);
//if(bany(ovar))
//print("ovar = %Q\n", ovar);
}
void
......@@ -911,10 +911,12 @@ mkvar(Reg *r, Adr *a)
}
node = a->node;
if(node == N || node->op != ONAME || node->orig != N)
if(node == N || node->op != ONAME || node->orig == N)
goto none;
node = node->orig;
if(node->sym->name[0] == '.')
if(node->orig != node)
fatal("%D: bad node", a);
if(node->sym == S || node->sym->name[0] == '.')
goto none;
et = a->etype;
o = a->offset;
......@@ -1571,7 +1573,7 @@ dumpone(Reg *r)
if(bany(&r->refahead))
print(" ra:%Q ", r->refahead);
if(bany(&r->calbehind))
print("cb:%Q ", r->calbehind);
print(" cb:%Q ", r->calbehind);
if(bany(&r->calahead))
print(" ca:%Q ", r->calahead);
if(bany(&r->regdiff))
......
......@@ -310,6 +310,7 @@ datastring(char *s, int len, Addr *a)
sym = stringsym(s, len);
a->type = D_EXTERN;
a->sym = sym;
a->node = sym->def;
a->offset = widthptr+4; // skip header
a->etype = TINT32;
}
......@@ -326,6 +327,7 @@ datagostring(Strlit *sval, Addr *a)
sym = stringsym(sval->s, sval->len);
a->type = D_EXTERN;
a->sym = sym;
a->node = sym->def;
a->offset = 0; // header
a->etype = TINT32;
}
......
......@@ -485,6 +485,7 @@ nodarg(Type *t, int fp)
fatal("nodarg: offset not computed for %T", t);
n->xoffset = t->width;
n->addable = 1;
n->orig = t->nname;
fp:
switch(fp) {
......@@ -1119,6 +1120,7 @@ naddr(Node *n, Addr *a, int canemitcode)
a->offset = n->xoffset;
a->sym = n->left->sym;
a->type = D_PARAM;
a->node = n->left->orig;
break;
case ONAME:
......@@ -1131,6 +1133,9 @@ naddr(Node *n, Addr *a, int canemitcode)
}
a->offset = n->xoffset;
a->sym = n->sym;
a->node = n->orig;
//if(a->node >= (Node*)&n)
// fatal("stack node");
if(a->sym == S)
a->sym = lookup(".noname");
if(n->method) {
......@@ -1148,8 +1153,6 @@ naddr(Node *n, Addr *a, int canemitcode)
break;
case PAUTO:
a->type = D_AUTO;
if (n->sym)
a->node = n->orig;
break;
case PPARAM:
case PPARAMOUT:
......
......@@ -89,8 +89,8 @@ setoutvar(void)
ovar.b[z] |= bit.b[z];
t = structnext(&save);
}
//if(bany(b))
//print("ovars = %Q\n", &ovar);
//if(bany(&ovar))
//print("ovars = %Q\n", ovar);
}
static void
......@@ -968,11 +968,14 @@ mkvar(Reg *r, Adr *a)
n = t;
break;
}
node = a->node;
if(node == N || node->op != ONAME || node->orig != N)
if(node == N || node->op != ONAME || node->orig == N)
goto none;
node = node->orig;
if(node->sym->name[0] == '.')
if(node->orig != node)
fatal("%D: bad node", a);
if(node->sym == S || node->sym->name[0] == '.')
goto none;
et = a->etype;
o = a->offset;
......@@ -1622,7 +1625,7 @@ dumpone(Reg *r)
if(bany(&r->refahead))
print(" ra:%Q ", r->refahead);
if(bany(&r->calbehind))
print("cb:%Q ", r->calbehind);
print(" cb:%Q ", r->calbehind);
if(bany(&r->calahead))
print(" ca:%Q ", r->calahead);
if(bany(&r->regdiff))
......
......@@ -308,6 +308,7 @@ datastring(char *s, int len, Addr *a)
sym = stringsym(s, len);
a->type = D_EXTERN;
a->sym = sym;
a->node = sym->def;
a->offset = widthptr+4; // skip header
a->etype = TINT32;
}
......@@ -324,6 +325,7 @@ datagostring(Strlit *sval, Addr *a)
sym = stringsym(sval->s, sval->len);
a->type = D_EXTERN;
a->sym = sym;
a->node = sym->def;
a->offset = 0; // header
a->etype = TINT32;
}
......
......@@ -964,6 +964,7 @@ nodarg(Type *t, int fp)
fatal("nodarg: offset not computed for %T", t);
n->xoffset = t->width;
n->addable = 1;
n->orig = t->nname;
break;
}
......@@ -1152,6 +1153,7 @@ memname(Node *n, Type *t)
strcpy(namebuf, n->sym->name);
namebuf[0] = '.'; // keep optimizer from registerizing
n->sym = lookup(namebuf);
n->orig->sym = n->sym;
}
void
......@@ -1828,6 +1830,7 @@ naddr(Node *n, Addr *a, int canemitcode)
a->offset = n->xoffset;
a->sym = n->left->sym;
a->type = D_PARAM;
a->node = n->left->orig;
break;
case ONAME:
......@@ -1840,6 +1843,9 @@ naddr(Node *n, Addr *a, int canemitcode)
}
a->offset = n->xoffset;
a->sym = n->sym;
a->node = n->orig;
//if(a->node >= (Node*)&n)
// fatal("stack node");
if(a->sym == S)
a->sym = lookup(".noname");
if(n->method) {
......@@ -1857,8 +1863,6 @@ naddr(Node *n, Addr *a, int canemitcode)
break;
case PAUTO:
a->type = D_AUTO;
if (n->sym)
a->node = n->orig;
break;
case PPARAM:
case PPARAMOUT:
......
......@@ -89,8 +89,8 @@ setoutvar(void)
ovar.b[z] |= bit.b[z];
t = structnext(&save);
}
//if(bany(b))
//print("ovars = %Q\n", &ovar);
//if(bany(ovar))
//print("ovars = %Q\n", ovar);
}
static void
......@@ -848,10 +848,12 @@ mkvar(Reg *r, Adr *a)
}
node = a->node;
if(node == N || node->op != ONAME || node->orig != N)
if(node == N || node->op != ONAME || node->orig == N)
goto none;
node = node->orig;
if(node->sym->name[0] == '.')
if(node->orig != node)
fatal("%D: bad node", a);
if(node->sym == S || node->sym->name[0] == '.')
goto none;
et = a->etype;
o = a->offset;
......@@ -1482,7 +1484,7 @@ dumpone(Reg *r)
if(bany(&r->refahead))
print(" ra:%Q ", r->refahead);
if(bany(&r->calbehind))
print("cb:%Q ", r->calbehind);
print(" cb:%Q ", r->calbehind);
if(bany(&r->calahead))
print(" ca:%Q ", r->calahead);
if(bany(&r->regdiff))
......
......@@ -151,9 +151,9 @@ Qconv(Fmt *fp)
else
fmtprint(fp, " ");
if(var[i].node == N || var[i].node->sym == S)
fmtprint(fp, "$%lld", var[i].offset);
fmtprint(fp, "$%lld", i);
else {
fmtprint(fp, var[i].node->sym->name);
fmtprint(fp, "%s", var[i].node->sym->name);
if(var[i].offset != 0)
fmtprint(fp, "%+lld", (vlong)var[i].offset);
}
......
......@@ -805,6 +805,7 @@ tempname(Node *nn, Type *t)
s = lookup(namebuf);
n = nod(ONAME, N, N);
n->sym = s;
s->def = n;
n->type = t;
n->class = PAUTO;
n->addable = 1;
......
......@@ -267,6 +267,7 @@ stringsym(char *s, int len)
if(sym->flags & SymUniq)
return sym;
sym->flags |= SymUniq;
sym->def = newname(sym);
off = 0;
......
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pkg
func NonASCII(b []byte, i int) int {
for i = 0; i < len(b); i++ {
if b[i] >= 0x80 {
break
}
}
return i
}
// $G -N -o slow.$A $D/bug369.dir/pkg.go &&
// $G -o fast.$A $D/bug369.dir/pkg.go &&
// $G $D/$F.go && $L $F.$A && ./$A.out
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Test that compiling with optimization turned on produces faster code.
package main
import (
"flag"
"os"
"runtime"
"testing"
fast "./fast"
slow "./slow"
)
var buf = make([]byte, 1048576)
func BenchmarkFastNonASCII(b *testing.B) {
for i := 0; i < b.N; i++ {
fast.NonASCII(buf, 0)
}
}
func BenchmarkSlowNonASCII(b *testing.B) {
for i := 0; i < b.N; i++ {
slow.NonASCII(buf, 0)
}
}
func main() {
os.Args = []string{os.Args[0], "-test.benchtime=0.1"}
flag.Parse()
rslow := testing.Benchmark(BenchmarkSlowNonASCII)
rfast := testing.Benchmark(BenchmarkFastNonASCII)
tslow := rslow.NsPerOp()
tfast := rfast.NsPerOp()
// Optimization should be good for at least 2x, but be forgiving.
// On the ARM simulator we see closer to 1.5x.
speedup := float64(tslow)/float64(tfast)
want := 1.8
if runtime.GOARCH == "arm" {
want = 1.3
}
if speedup < want {
println("fast:", tfast, "slow:", tslow, "speedup:", speedup, "want:", want)
println("not fast enough")
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment