Commit e2d326b8 authored by Russ Cox's avatar Russ Cox

5g, 6g, 8g: fix loop finding bug, squash jmps

The loop recognizer uses the standard dominance
frontiers but gets confused by dead code, which
has a (not explicitly set) rpo number of 0, meaning it
looks like the head of the function, so it dominates
everything.  If the loop recognizer encounters dead
code while tracking backward through the graph
it fails to recognize where it started as a loop, and
then the optimizer does not registerize values loaded
inside that loop.  Fix by checking rpo against rpo2r.

Separately, run a quick pass over the generated
code to squash JMPs to JMP instructions, which
are convenient to emit during code generation but
difficult to read when debugging the -S output.
A side effect of this pass is to eliminate dead code,
so the output files may be slightly smaller and the
optimizer may have less work to do.
There is no semantic effect, because the linkers
flatten JMP chains and delete dead instructions
when laying out the final code.  Doing it here too
just makes the -S output easier to read and more
like what the final binary will contain.

The "dead code breaks loop finding" bug is thus
fixed twice over.  It seemed prudent to fix loopit
separately just in case dead code ever sneaks back
in for one reason or another.

R=ken2
CC=golang-dev
https://golang.org/cl/5190043
parent da99a5bc
...@@ -42,6 +42,9 @@ ...@@ -42,6 +42,9 @@
int noreturn(Prog *p); int noreturn(Prog *p);
static int first = 0; static int first = 0;
static void fixjmp(Prog*);
Reg* Reg*
rega(void) rega(void)
{ {
...@@ -171,6 +174,8 @@ regopt(Prog *firstp) ...@@ -171,6 +174,8 @@ regopt(Prog *firstp)
if(first == 0) { if(first == 0) {
fmtinstall('Q', Qconv); fmtinstall('Q', Qconv);
} }
fixjmp(firstp);
first++; first++;
if(debug['K']) { if(debug['K']) {
...@@ -1159,10 +1164,12 @@ loopit(Reg *r, int32 nr) ...@@ -1159,10 +1164,12 @@ loopit(Reg *r, int32 nr)
r1 = rpo2r[i]; r1 = rpo2r[i];
me = r1->rpo; me = r1->rpo;
d = -1; d = -1;
if(r1->p1 != R && r1->p1->rpo < me) // rpo2r[r->rpo] == r protects against considering dead code,
// which has r->rpo == 0.
if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
d = r1->p1->rpo; d = r1->p1->rpo;
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
if(r1->rpo < me) if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
d = rpolca(idom, d, r1->rpo); d = rpolca(idom, d, r1->rpo);
idom[i] = d; idom[i] = d;
} }
...@@ -1608,3 +1615,123 @@ dumpit(char *str, Reg *r0) ...@@ -1608,3 +1615,123 @@ dumpit(char *str, Reg *r0)
// } // }
} }
} }
/*
* the code generator depends on being able to write out JMP (B)
* instructions that it can jump to now but fill in later.
* the linker will resolve them nicely, but they make the code
* longer and more difficult to follow during debugging.
* remove them.
*/
/* what instruction does a JMP to p eventually land on? */
static Prog*
chasejmp(Prog *p, int *jmploop)
{
int n;
n = 0;
while(p != P && p->as == AB && p->to.type == D_BRANCH) {
if(++n > 10) {
*jmploop = 1;
break;
}
p = p->to.branch;
}
return p;
}
/*
* reuse reg pointer for mark/sweep state.
* leave reg==nil at end because alive==nil.
*/
#define alive ((void*)0)
#define dead ((void*)1)
/* mark all code reachable from firstp as alive */
static void
mark(Prog *firstp)
{
Prog *p;
for(p=firstp; p; p=p->link) {
if(p->regp != dead)
break;
p->regp = alive;
if(p->as != ABL && p->to.type == D_BRANCH && p->to.branch)
mark(p->to.branch);
if(p->as == AB || p->as == ARET || (p->as == ABL && noreturn(p)))
break;
}
}
static void
fixjmp(Prog *firstp)
{
int jmploop;
Prog *p, *last;
if(debug['R'] && debug['v'])
print("\nfixjmp\n");
// pass 1: resolve jump to B, mark all code as dead.
jmploop = 0;
for(p=firstp; p; p=p->link) {
if(debug['R'] && debug['v'])
print("%P\n", p);
if(p->as != ABL && p->to.type == D_BRANCH && p->to.branch && p->to.branch->as == AB) {
p->to.branch = chasejmp(p->to.branch, &jmploop);
if(debug['R'] && debug['v'])
print("->%P\n", p);
}
p->regp = dead;
}
if(debug['R'] && debug['v'])
print("\n");
// pass 2: mark all reachable code alive
mark(firstp);
// pass 3: delete dead code (mostly JMPs).
last = nil;
for(p=firstp; p; p=p->link) {
if(p->regp == dead) {
if(p->link == P && p->as == ARET && last && last->as != ARET) {
// This is the final ARET, and the code so far doesn't have one.
// Let it stay.
} else {
if(debug['R'] && debug['v'])
print("del %P\n", p);
continue;
}
}
if(last)
last->link = p;
last = p;
}
last->link = P;
// pass 4: elide JMP to next instruction.
// only safe if there are no jumps to JMPs anymore.
if(!jmploop) {
last = nil;
for(p=firstp; p; p=p->link) {
if(p->as == AB && p->to.type == D_BRANCH && p->to.branch == p->link) {
if(debug['R'] && debug['v'])
print("del %P\n", p);
continue;
}
if(last)
last->link = p;
last = p;
}
last->link = P;
}
if(debug['R'] && debug['v']) {
print("\n");
for(p=firstp; p; p=p->link)
print("%P\n", p);
print("\n");
}
}
...@@ -151,6 +151,8 @@ static char* regname[] = { ...@@ -151,6 +151,8 @@ static char* regname[] = {
".X15", ".X15",
}; };
static void fixjmp(Prog*);
void void
regopt(Prog *firstp) regopt(Prog *firstp)
{ {
...@@ -166,6 +168,8 @@ regopt(Prog *firstp) ...@@ -166,6 +168,8 @@ regopt(Prog *firstp)
first = 0; first = 0;
} }
fixjmp(firstp);
// count instructions // count instructions
nr = 0; nr = 0;
for(p=firstp; p!=P; p=p->link) for(p=firstp; p!=P; p=p->link)
...@@ -800,9 +804,9 @@ brk: ...@@ -800,9 +804,9 @@ brk:
if(ostats.ndelmov) if(ostats.ndelmov)
print(" %4d delmov\n", ostats.ndelmov); print(" %4d delmov\n", ostats.ndelmov);
if(ostats.nvar) if(ostats.nvar)
print(" %4d delmov\n", ostats.nvar); print(" %4d var\n", ostats.nvar);
if(ostats.naddr) if(ostats.naddr)
print(" %4d delmov\n", ostats.naddr); print(" %4d addr\n", ostats.naddr);
memset(&ostats, 0, sizeof(ostats)); memset(&ostats, 0, sizeof(ostats));
} }
...@@ -1217,10 +1221,12 @@ loopit(Reg *r, int32 nr) ...@@ -1217,10 +1221,12 @@ loopit(Reg *r, int32 nr)
r1 = rpo2r[i]; r1 = rpo2r[i];
me = r1->rpo; me = r1->rpo;
d = -1; d = -1;
if(r1->p1 != R && r1->p1->rpo < me) // rpo2r[r->rpo] == r protects against considering dead code,
// which has r->rpo == 0.
if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
d = r1->p1->rpo; d = r1->p1->rpo;
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
if(r1->rpo < me) if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
d = rpolca(idom, d, r1->rpo); d = rpolca(idom, d, r1->rpo);
idom[i] = d; idom[i] = d;
} }
...@@ -1685,3 +1691,123 @@ noreturn(Prog *p) ...@@ -1685,3 +1691,123 @@ noreturn(Prog *p)
return 1; return 1;
return 0; return 0;
} }
/*
* the code generator depends on being able to write out JMP
* instructions that it can jump to now but fill in later.
* the linker will resolve them nicely, but they make the code
* longer and more difficult to follow during debugging.
* remove them.
*/
/* what instruction does a JMP to p eventually land on? */
static Prog*
chasejmp(Prog *p, int *jmploop)
{
int n;
n = 0;
while(p != P && p->as == AJMP && p->to.type == D_BRANCH) {
if(++n > 10) {
*jmploop = 1;
break;
}
p = p->to.branch;
}
return p;
}
/*
* reuse reg pointer for mark/sweep state.
* leave reg==nil at end because alive==nil.
*/
#define alive ((void*)0)
#define dead ((void*)1)
/* mark all code reachable from firstp as alive */
static void
mark(Prog *firstp)
{
Prog *p;
for(p=firstp; p; p=p->link) {
if(p->reg != dead)
break;
p->reg = alive;
if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch)
mark(p->to.branch);
if(p->as == AJMP || p->as == ARET || (p->as == ACALL && noreturn(p)))
break;
}
}
static void
fixjmp(Prog *firstp)
{
int jmploop;
Prog *p, *last;
if(debug['R'] && debug['v'])
print("\nfixjmp\n");
// pass 1: resolve jump to AJMP, mark all code as dead.
jmploop = 0;
for(p=firstp; p; p=p->link) {
if(debug['R'] && debug['v'])
print("%P\n", p);
if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch && p->to.branch->as == AJMP) {
p->to.branch = chasejmp(p->to.branch, &jmploop);
if(debug['R'] && debug['v'])
print("->%P\n", p);
}
p->reg = dead;
}
if(debug['R'] && debug['v'])
print("\n");
// pass 2: mark all reachable code alive
mark(firstp);
// pass 3: delete dead code (mostly JMPs).
last = nil;
for(p=firstp; p; p=p->link) {
if(p->reg == dead) {
if(p->link == P && p->as == ARET && last && last->as != ARET) {
// This is the final ARET, and the code so far doesn't have one.
// Let it stay.
} else {
if(debug['R'] && debug['v'])
print("del %P\n", p);
continue;
}
}
if(last)
last->link = p;
last = p;
}
last->link = P;
// pass 4: elide JMP to next instruction.
// only safe if there are no jumps to JMPs anymore.
if(!jmploop) {
last = nil;
for(p=firstp; p; p=p->link) {
if(p->as == AJMP && p->to.type == D_BRANCH && p->to.branch == p->link) {
if(debug['R'] && debug['v'])
print("del %P\n", p);
continue;
}
if(last)
last->link = p;
last = p;
}
last->link = P;
}
if(debug['R'] && debug['v']) {
print("\n");
for(p=firstp; p; p=p->link)
print("%P\n", p);
print("\n");
}
}
...@@ -39,6 +39,8 @@ ...@@ -39,6 +39,8 @@
static int first = 1; static int first = 1;
static void fixjmp(Prog*);
Reg* Reg*
rega(void) rega(void)
{ {
...@@ -132,6 +134,8 @@ regopt(Prog *firstp) ...@@ -132,6 +134,8 @@ regopt(Prog *firstp)
exregoffset = D_DI; // no externals exregoffset = D_DI; // no externals
first = 0; first = 0;
} }
fixjmp(firstp);
// count instructions // count instructions
nr = 0; nr = 0;
...@@ -694,9 +698,9 @@ brk: ...@@ -694,9 +698,9 @@ brk:
if(ostats.ndelmov) if(ostats.ndelmov)
print(" %4d delmov\n", ostats.ndelmov); print(" %4d delmov\n", ostats.ndelmov);
if(ostats.nvar) if(ostats.nvar)
print(" %4d delmov\n", ostats.nvar); print(" %4d var\n", ostats.nvar);
if(ostats.naddr) if(ostats.naddr)
print(" %4d delmov\n", ostats.naddr); print(" %4d addr\n", ostats.naddr);
memset(&ostats, 0, sizeof(ostats)); memset(&ostats, 0, sizeof(ostats));
} }
...@@ -1097,10 +1101,12 @@ loopit(Reg *r, int32 nr) ...@@ -1097,10 +1101,12 @@ loopit(Reg *r, int32 nr)
r1 = rpo2r[i]; r1 = rpo2r[i];
me = r1->rpo; me = r1->rpo;
d = -1; d = -1;
if(r1->p1 != R && r1->p1->rpo < me) // rpo2r[r->rpo] == r protects against considering dead code,
// which has r->rpo == 0.
if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
d = r1->p1->rpo; d = r1->p1->rpo;
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
if(r1->rpo < me) if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
d = rpolca(idom, d, r1->rpo); d = rpolca(idom, d, r1->rpo);
idom[i] = d; idom[i] = d;
} }
...@@ -1544,3 +1550,123 @@ noreturn(Prog *p) ...@@ -1544,3 +1550,123 @@ noreturn(Prog *p)
return 1; return 1;
return 0; return 0;
} }
/*
* the code generator depends on being able to write out JMP
* instructions that it can jump to now but fill in later.
* the linker will resolve them nicely, but they make the code
* longer and more difficult to follow during debugging.
* remove them.
*/
/* what instruction does a JMP to p eventually land on? */
static Prog*
chasejmp(Prog *p, int *jmploop)
{
int n;
n = 0;
while(p != P && p->as == AJMP && p->to.type == D_BRANCH) {
if(++n > 10) {
*jmploop = 1;
break;
}
p = p->to.branch;
}
return p;
}
/*
* reuse reg pointer for mark/sweep state.
* leave reg==nil at end because alive==nil.
*/
#define alive ((void*)0)
#define dead ((void*)1)
/* mark all code reachable from firstp as alive */
static void
mark(Prog *firstp)
{
Prog *p;
for(p=firstp; p; p=p->link) {
if(p->reg != dead)
break;
p->reg = alive;
if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch)
mark(p->to.branch);
if(p->as == AJMP || p->as == ARET || (p->as == ACALL && noreturn(p)))
break;
}
}
static void
fixjmp(Prog *firstp)
{
int jmploop;
Prog *p, *last;
if(debug['R'] && debug['v'])
print("\nfixjmp\n");
// pass 1: resolve jump to AJMP, mark all code as dead.
jmploop = 0;
for(p=firstp; p; p=p->link) {
if(debug['R'] && debug['v'])
print("%P\n", p);
if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch && p->to.branch->as == AJMP) {
p->to.branch = chasejmp(p->to.branch, &jmploop);
if(debug['R'] && debug['v'])
print("->%P\n", p);
}
p->reg = dead;
}
if(debug['R'] && debug['v'])
print("\n");
// pass 2: mark all reachable code alive
mark(firstp);
// pass 3: delete dead code (mostly JMPs).
last = nil;
for(p=firstp; p; p=p->link) {
if(p->reg == dead) {
if(p->link == P && p->as == ARET && last && last->as != ARET) {
// This is the final ARET, and the code so far doesn't have one.
// Let it stay.
} else {
if(debug['R'] && debug['v'])
print("del %P\n", p);
continue;
}
}
if(last)
last->link = p;
last = p;
}
last->link = P;
// pass 4: elide JMP to next instruction.
// only safe if there are no jumps to JMPs anymore.
if(!jmploop) {
last = nil;
for(p=firstp; p; p=p->link) {
if(p->as == AJMP && p->to.type == D_BRANCH && p->to.branch == p->link) {
if(debug['R'] && debug['v'])
print("del %P\n", p);
continue;
}
if(last)
last->link = p;
last = p;
}
last->link = P;
}
if(debug['R'] && debug['v']) {
print("\n");
for(p=firstp; p; p=p->link)
print("%P\n", p);
print("\n");
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment