Commit 9c204852 authored by Russ Cox's avatar Russ Cox

6l: function at a time code layout

Also change the span-dependent jump algorithm
to use fewer iterations:

* resolve forward jumps at their targets (comefrom list)
* mark jumps as small or big and only do small->big
* record whether a jump failed to be encodable

These changes mean that a function with only small
jumps can be laid out in a single iteration, and the
vast majority of functions take just two iterations.
I was seeing a maximum of 5 iterations before; the
max now is 3 and there are fewer that get even that far.

R=ken2
CC=golang-dev
https://golang.org/cl/2537041
parent 837c204a
...@@ -246,6 +246,7 @@ enum as ...@@ -246,6 +246,7 @@ enum as
/* internal only */ /* internal only */
#define D_SIZE (D_NONE+40) #define D_SIZE (D_NONE+40)
#define D_PCREL (D_NONE+41)
/* /*
* this is the ranlib header * this is the ranlib header
......
...@@ -824,6 +824,7 @@ enum ...@@ -824,6 +824,7 @@ enum
D_INDIR, /* additive */ D_INDIR, /* additive */
D_SIZE = D_INDIR + D_INDIR, /* 6l internal */ D_SIZE = D_INDIR + D_INDIR, /* 6l internal */
D_PCREL,
T_TYPE = 1<<0, T_TYPE = 1<<0,
T_INDEX = 1<<1, T_INDEX = 1<<1,
......
...@@ -344,10 +344,8 @@ phsh(ElfPhdr *ph, ElfShdr *sh) ...@@ -344,10 +344,8 @@ phsh(ElfPhdr *ph, ElfShdr *sh)
void void
asmb(void) asmb(void)
{ {
Prog *p;
int32 v, magic; int32 v, magic;
int a, dynsym; int a, dynsym;
uchar *op1;
vlong vl, va, startva, fo, w, symo, elfsymo, elfstro, elfsymsize, machlink; vlong vl, va, startva, fo, w, symo, elfsymo, elfstro, elfsymsize, machlink;
vlong symdatva = SYMDATVA; vlong symdatva = SYMDATVA;
ElfEhdr *eh; ElfEhdr *eh;
...@@ -366,35 +364,8 @@ asmb(void) ...@@ -366,35 +364,8 @@ asmb(void)
elfsymo = 0; elfsymo = 0;
seek(cout, HEADR, 0); seek(cout, HEADR, 0);
pc = INITTEXT; pc = INITTEXT;
codeblk(pc, segtext.sect->len);
for(cursym = textp; cursym != nil; cursym = cursym->next) { pc += segtext.sect->len;
for(p = cursym->text; p != P; p = p->link) {
if(p->pc != pc) {
if(!debug['a'])
print("%P\n", curp);
diag("phase error %llux sb %llux in %s", p->pc, pc, TNAME);
pc = p->pc;
}
curp = p;
asmins(p);
a = (andptr - and);
if(cbc < a)
cflush();
if(debug['a']) {
Bprint(&bso, pcstr, pc);
for(op1 = and; op1 < andptr; op1++)
Bprint(&bso, "%.2ux", *op1);
for(; op1 < and+Maxand; op1++)
Bprint(&bso, " ");
Bprint(&bso, "%P\n", curp);
}
memmove(cbp, and, a);
cbp += a;
pc += a;
cbc -= a;
}
}
cflush();
/* output read-only data in text segment */ /* output read-only data in text segment */
sect = segtext.sect->next; sect = segtext.sect->next;
......
...@@ -94,8 +94,8 @@ struct Prog ...@@ -94,8 +94,8 @@ struct Prog
Adr from; Adr from;
Adr to; Adr to;
Prog* forwd; Prog* forwd;
Prog* comefrom;
Prog* link; Prog* link;
Prog* dlink;
Prog* pcond; /* work on this */ Prog* pcond; /* work on this */
vlong pc; vlong pc;
int32 spadj; int32 spadj;
......
...@@ -35,23 +35,127 @@ ...@@ -35,23 +35,127 @@
static int rexflag; static int rexflag;
static int asmode; static int asmode;
static vlong vaddr(Adr*, Reloc*);
void
span1(Sym *s)
{
Prog *p, *q;
int32 c, v, loop;
uchar *bp;
int n, m, i;
cursym = s;
for(p = s->text; p != P; p = p->link) {
p->back = 2; // use short branches first time through
if((q = p->pcond) != P && (q->back & 2))
p->back |= 1; // backward jump
if(p->as == AADJSP) {
p->to.type = D_SP;
v = -p->from.offset;
p->from.offset = v;
p->as = p->mode != 64? AADDL: AADDQ;
if(v < 0) {
p->as = p->mode != 64? ASUBL: ASUBQ;
v = -v;
p->from.offset = v;
}
if(v == 0)
p->as = ANOP;
}
}
n = 0;
do {
loop = 0;
memset(s->r, 0, s->nr*sizeof s->r[0]);
s->nr = 0;
s->np = 0;
c = 0;
for(p = s->text; p != P; p = p->link) {
p->pc = c;
// process forward jumps to p
for(q = p->comefrom; q != P; q = q->forwd) {
v = p->pc - (q->pc + q->mark);
if(q->back & 2) { // short
if(v > 127) {
loop++;
q->back ^= 2;
}
s->p[q->pc+1] = v;
} else {
bp = s->p + q->pc + q->mark - 4;
*bp++ = v;
*bp++ = v>>8;
*bp++ = v>>16;
*bp++ = v>>24;
}
}
p->comefrom = P;
asmins(p);
p->pc = c;
m = andptr-and;
symgrow(s, p->pc+m);
memmove(s->p+p->pc, and, m);
p->mark = m;
c += m;
}
if(++n > 20) {
diag("span must be looping");
errorexit();
}
} while(loop);
s->size = c;
if(debug['a'] > 1) {
print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
for(i=0; i<s->np; i++) {
print(" %.2ux", s->p[i]);
if(i%16 == 15)
print("\n %.6ux", i+1);
}
if(i%16)
print("\n");
for(i=0; i<s->nr; i++) {
Reloc *r;
r = &s->r[i];
print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
}
}
}
void void
span(void) span(void)
{ {
Prog *p, *q; Prog *p, *q;
int32 v; int32 v;
vlong c, idat, etext, rosize; vlong c;
int m, n, again; int n;
Sym *s;
Section *sect, *rosect; Section *sect, *rosect;
Sym *sym;
if(debug['v'])
Bprint(&bso, "%5.2f span\n", cputime());
segtext.rwx = 05;
segtext.vaddr = INITTEXT - HEADR;
xdefine("etext", STEXT, 0L); xdefine("etext", STEXT, 0L);
xdefine("rodata", SRODATA, 0L); xdefine("rodata", SRODATA, 0L);
xdefine("erodata", SRODATA, 0L); xdefine("erodata", SRODATA, 0L);
idat = INITDAT; // NOTE(rsc): If we get rid of the globals we should
// be able to parallelize these iterations.
for(cursym = textp; cursym != nil; cursym = cursym->next) { for(cursym = textp; cursym != nil; cursym = cursym->next) {
if(!cursym->reachable)
continue;
for(p = cursym->text; p != P; p = p->link) { for(p = cursym->text; p != P; p = p->link) {
n = 0; n = 0;
if(p->to.type == D_BRANCH) if(p->to.type == D_BRANCH)
...@@ -75,98 +179,46 @@ span(void) ...@@ -75,98 +179,46 @@ span(void)
p->as = ANOP; p->as = ANOP;
} }
} }
span1(cursym);
} }
n = 0;
rosect = segtext.sect->next;
rosize = rosect->len;
start: // Next, loop over symbols to assign actual PCs.
if(debug['v']) // Could parallelize here too, by assigning to text
Bprint(&bso, "%5.2f span\n", cputime()); // and then letting threads copy down, but probably not worth it.
Bflush(&bso);
c = INITTEXT; c = INITTEXT;
sect = segtext.sect;
sect->vaddr = c;
for(cursym = textp; cursym != nil; cursym = cursym->next) { for(cursym = textp; cursym != nil; cursym = cursym->next) {
for(p = cursym->text; p != P; p = p->link) { if(!cursym->reachable)
if(p->to.type == D_BRANCH) continue;
if(p->back) cursym->value = c;
p->pc = c; for(p = cursym->text; p != P; p = p->link)
asmins(p); p->pc += c;
p->pc = c; c += cursym->size;
m = andptr-and; }
p->mark = m; sect->len = c - sect->vaddr;
c += m; xdefine("etext", STEXT, c);
}
}
loop:
n++;
if(debug['v']) if(debug['v'])
Bprint(&bso, "%5.2f span %d\n", cputime(), n); Bprint(&bso, "etext = %llux\n", c);
Bflush(&bso);
if(n > 50) {
print("span must be looping\n");
errorexit();
}
again = 0;
c = INITTEXT;
for(cursym = textp; cursym != nil; cursym = cursym->next) {
for(p = cursym->text; p != P; p = p->link) {
if(p->to.type == D_BRANCH || p->back & 0100) {
if(p->back)
p->pc = c;
asmins(p);
m = andptr-and;
if(m != p->mark) {
p->mark = m;
again++;
}
}
p->pc = c;
c += p->mark;
}
}
if(again) {
textsize = c;
goto loop;
}
etext = c;
if(rosect) { xdefine("rodata", SRODATA, c);
if(INITRND) if(INITRND)
c = rnd(c, INITRND); c = rnd(c, INITRND);
if(rosect->vaddr != c){ rosect = segtext.sect->next;
rosect->vaddr = c; rosect->vaddr = c;
goto start;
}
c += rosect->len; c += rosect->len;
} xdefine("erodata", SRODATA, c);
textsize = c - INITTEXT;
if(INITRND) {
INITDAT = rnd(c, INITRND);
if(INITDAT != idat) {
idat = INITDAT;
goto start;
}
}
xdefine("etext", STEXT, etext);
if(debug['v']) if(debug['v'])
Bprint(&bso, "etext = %llux\n", c); Bprint(&bso, "erodata = %llux", c);
Bflush(&bso); Bflush(&bso);
for(cursym = textp; cursym != nil; cursym = cursym->next)
cursym->value = cursym->text->pc;
textsize = c - INITTEXT;
segtext.rwx = 05; segtext.len = c - segtext.vaddr;
segtext.vaddr = INITTEXT - HEADR; segtext.filelen = segtext.len;
segtext.len = INITDAT - INITTEXT + HEADR;
segtext.filelen = textsize + HEADR;
sect = segtext.sect; if(INITRND)
sect->vaddr = INITTEXT; c = rnd(c, INITRND);
sect->len = etext - sect->vaddr; INITDAT = c;
// Adjust everything now that we know INITDAT. // Adjust everything now that we know INITDAT.
// This will get simpler when everything is relocatable // This will get simpler when everything is relocatable
...@@ -180,15 +232,15 @@ loop: ...@@ -180,15 +232,15 @@ loop:
xdefine("edata", SBSS, INITDAT+segdata.filelen); xdefine("edata", SBSS, INITDAT+segdata.filelen);
xdefine("end", SBSS, INITDAT+segdata.len); xdefine("end", SBSS, INITDAT+segdata.len);
for(sym=datap; sym!=nil; sym=sym->next) { for(s=datap; s!=nil; s=s->next) {
switch(sym->type) { switch(s->type) {
case SELFDATA: case SELFDATA:
case SRODATA: case SRODATA:
sym->value += rosect->vaddr; s->value += rosect->vaddr;
break; break;
case SDATA: case SDATA:
case SBSS: case SBSS:
sym->value += INITDAT; s->value += INITDAT;
break; break;
} }
} }
...@@ -624,6 +676,23 @@ put4(int32 v) ...@@ -624,6 +676,23 @@ put4(int32 v)
andptr += 4; andptr += 4;
} }
static void
relput4(Prog *p, Adr *a)
{
vlong v;
Reloc rel, *r;
v = vaddr(a, &rel);
if(rel.siz != 0) {
if(rel.siz != 4)
diag("bad reloc");
r = addrel(cursym);
*r = rel;
r->off = p->pc + andptr - and;
}
put4(v);
}
static void static void
put8(vlong v) put8(vlong v)
{ {
...@@ -638,27 +707,51 @@ put8(vlong v) ...@@ -638,27 +707,51 @@ put8(vlong v)
andptr += 8; andptr += 8;
} }
static vlong vaddr(Adr*); /*
static void
relput8(Prog *p, Adr *a)
{
vlong v;
Reloc rel, *r;
v = vaddr(a, &rel);
if(rel.siz != 0) {
r = addrel(cursym);
*r = rel;
r->siz = 8;
r->off = p->pc + andptr - and;
}
put8(v);
}
*/
vlong vlong
symaddr(Sym *s) symaddr(Sym *s)
{ {
Adr a; switch(s->type) {
case SFIXED:
return s->value;
a.type = D_ADDR; case SMACHO:
a.index = D_EXTERN; return INITDAT + segdata.filelen - dynptrsize + s->value;
a.offset = 0;
a.sym = s; default:
return vaddr(&a); if(!s->reachable)
diag("unreachable symbol in symaddr - %s", s->name);
return s->value;
}
} }
static vlong static vlong
vaddr(Adr *a) vaddr(Adr *a, Reloc *r)
{ {
int t; int t;
vlong v; vlong v;
Sym *s; Sym *s;
if(r != nil)
memset(r, 0, sizeof *r);
t = a->type; t = a->type;
v = a->offset; v = a->offset;
if(t == D_ADDR) if(t == D_ADDR)
...@@ -667,19 +760,24 @@ vaddr(Adr *a) ...@@ -667,19 +760,24 @@ vaddr(Adr *a)
case D_STATIC: case D_STATIC:
case D_EXTERN: case D_EXTERN:
s = a->sym; s = a->sym;
if(s != nil) {
switch(s->type) { switch(s->type) {
case SFIXED: case SFIXED:
v += s->value; v += s->value;
break; break;
case SMACHO:
v += INITDAT + segdata.filelen - dynptrsize + s->value;
break;
default: default:
if(!s->reachable) if(!s->reachable)
diag("unreachable symbol in vaddr - %s", s->name); diag("unreachable symbol in vaddr - %s", s->name);
v += s->value; if(r == nil) {
diag("need reloc for %D", a);
errorexit();
} }
r->type = D_ADDR;
r->siz = 4; // TODO: 8 for external symbols
r->off = -1; // caller must fill in
r->sym = s;
r->add = v;
v = 0;
break;
} }
} }
return v; return v;
...@@ -690,10 +788,12 @@ asmandsz(Adr *a, int r, int rex, int m64) ...@@ -690,10 +788,12 @@ asmandsz(Adr *a, int r, int rex, int m64)
{ {
int32 v; int32 v;
int t, scale; int t, scale;
Reloc rel;
rex &= (0x40 | Rxr); rex &= (0x40 | Rxr);
v = a->offset; v = a->offset;
t = a->type; t = a->type;
rel.siz = 0;
if(a->index != D_NONE) { if(a->index != D_NONE) {
if(t < D_INDIR) { if(t < D_INDIR) {
switch(t) { switch(t) {
...@@ -702,7 +802,7 @@ asmandsz(Adr *a, int r, int rex, int m64) ...@@ -702,7 +802,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
case D_STATIC: case D_STATIC:
case D_EXTERN: case D_EXTERN:
t = D_NONE; t = D_NONE;
v = vaddr(a); v = vaddr(a, &rel);
break; break;
case D_AUTO: case D_AUTO:
case D_PARAM: case D_PARAM:
...@@ -715,15 +815,15 @@ asmandsz(Adr *a, int r, int rex, int m64) ...@@ -715,15 +815,15 @@ asmandsz(Adr *a, int r, int rex, int m64)
if(t == D_NONE) { if(t == D_NONE) {
*andptr++ = (0 << 6) | (4 << 0) | (r << 3); *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
asmidx(a->scale, a->index, t); asmidx(a->scale, a->index, t);
put4(v); goto putrelv;
return; return;
} }
if(v == 0 && t != D_BP && t != D_R13) { if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
*andptr++ = (0 << 6) | (4 << 0) | (r << 3); *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
asmidx(a->scale, a->index, t); asmidx(a->scale, a->index, t);
return; return;
} }
if(v >= -128 && v < 128) { if(v >= -128 && v < 128 && rel.siz == 0) {
*andptr++ = (1 << 6) | (4 << 0) | (r << 3); *andptr++ = (1 << 6) | (4 << 0) | (r << 3);
asmidx(a->scale, a->index, t); asmidx(a->scale, a->index, t);
*andptr++ = v; *andptr++ = v;
...@@ -731,8 +831,7 @@ asmandsz(Adr *a, int r, int rex, int m64) ...@@ -731,8 +831,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
} }
*andptr++ = (2 << 6) | (4 << 0) | (r << 3); *andptr++ = (2 << 6) | (4 << 0) | (r << 3);
asmidx(a->scale, a->index, t); asmidx(a->scale, a->index, t);
put4(v); goto putrelv;
return;
} }
if(t >= D_AL && t <= D_X0+15) { if(t >= D_AL && t <= D_X0+15) {
if(v) if(v)
...@@ -750,7 +849,7 @@ asmandsz(Adr *a, int r, int rex, int m64) ...@@ -750,7 +849,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
case D_STATIC: case D_STATIC:
case D_EXTERN: case D_EXTERN:
t = D_NONE; t = D_NONE;
v = vaddr(a); v = vaddr(a, &rel);
break; break;
case D_AUTO: case D_AUTO:
case D_PARAM: case D_PARAM:
...@@ -765,14 +864,12 @@ asmandsz(Adr *a, int r, int rex, int m64) ...@@ -765,14 +864,12 @@ asmandsz(Adr *a, int r, int rex, int m64)
if(t == D_NONE || (D_CS <= t && t <= D_GS)) { if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
if(asmode != 64){ if(asmode != 64){
*andptr++ = (0 << 6) | (5 << 0) | (r << 3); *andptr++ = (0 << 6) | (5 << 0) | (r << 3);
put4(v); goto putrelv;
return;
} }
/* temporary */ /* temporary */
*andptr++ = (0 << 6) | (4 << 0) | (r << 3); /* sib present */ *andptr++ = (0 << 6) | (4 << 0) | (r << 3); /* sib present */
*andptr++ = (0 << 6) | (4 << 3) | (5 << 0); /* DS:d32 */ *andptr++ = (0 << 6) | (4 << 3) | (5 << 0); /* DS:d32 */
put4(v); goto putrelv;
return;
} }
if(t == D_SP || t == D_R12) { if(t == D_SP || t == D_R12) {
if(v == 0) { if(v == 0) {
...@@ -788,8 +885,7 @@ asmandsz(Adr *a, int r, int rex, int m64) ...@@ -788,8 +885,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
} }
*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3); *andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
asmidx(scale, D_NONE, t); asmidx(scale, D_NONE, t);
put4(v); goto putrelv;
return;
} }
if(t >= D_AX && t <= D_R15) { if(t >= D_AX && t <= D_R15) {
if(v == 0 && t != D_BP && t != D_R13) { if(v == 0 && t != D_BP && t != D_R13) {
...@@ -803,9 +899,24 @@ asmandsz(Adr *a, int r, int rex, int m64) ...@@ -803,9 +899,24 @@ asmandsz(Adr *a, int r, int rex, int m64)
return; return;
} }
*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3); *andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
goto putrelv;
}
goto bad;
putrelv:
if(rel.siz != 0) {
Reloc *r;
if(rel.siz != 4) {
diag("bad rel");
goto bad;
}
r = addrel(cursym);
*r = rel;
r->off = curp->pc + andptr - and;
}
put4(v); put4(v);
return; return;
}
bad: bad:
diag("asmand: bad address %D", a); diag("asmand: bad address %D", a);
...@@ -1040,6 +1151,10 @@ doasm(Prog *p) ...@@ -1040,6 +1151,10 @@ doasm(Prog *p)
Movtab *mo; Movtab *mo;
int z, op, ft, tt, xo, l, pre; int z, op, ft, tt, xo, l, pre;
vlong v; vlong v;
Reloc rel, *r;
Adr *a;
curp = p; // TODO
o = opindex[p->as]; o = opindex[p->as];
if(o == nil) { if(o == nil) {
...@@ -1116,7 +1231,7 @@ found: ...@@ -1116,7 +1231,7 @@ found:
diag("asmins: illegal in %d-bit mode: %P", p->mode, p); diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
break; break;
} }
v = vaddr(&p->from);
op = o->op[z]; op = o->op[z];
if(op == 0x0f) { if(op == 0x0f) {
*andptr++ = op; *andptr++ = op;
...@@ -1222,64 +1337,74 @@ found: ...@@ -1222,64 +1337,74 @@ found:
break; break;
case Zm_ibo: case Zm_ibo:
v = vaddr(&p->to);
*andptr++ = op; *andptr++ = op;
asmando(&p->from, o->op[z+1]); asmando(&p->from, o->op[z+1]);
*andptr++ = v; *andptr++ = vaddr(&p->to, nil);
break; break;
case Zibo_m: case Zibo_m:
*andptr++ = op; *andptr++ = op;
asmando(&p->to, o->op[z+1]); asmando(&p->to, o->op[z+1]);
*andptr++ = v; *andptr++ = vaddr(&p->from, nil);
break; break;
case Zibo_m_xm: case Zibo_m_xm:
z = mediaop(o, op, t[3], z); z = mediaop(o, op, t[3], z);
asmando(&p->to, o->op[z+1]); asmando(&p->to, o->op[z+1]);
*andptr++ = v; *andptr++ = vaddr(&p->from, nil);
break; break;
case Z_ib: case Z_ib:
v = vaddr(&p->to);
case Zib_: case Zib_:
if(t[2] == Zib_)
a = &p->from;
else
a = &p->to;
*andptr++ = op; *andptr++ = op;
*andptr++ = v; *andptr++ = vaddr(a, nil);
break; break;
case Zib_rp: case Zib_rp:
rexflag |= regrex[p->to.type] & (Rxb|0x40); rexflag |= regrex[p->to.type] & (Rxb|0x40);
*andptr++ = op + reg[p->to.type]; *andptr++ = op + reg[p->to.type];
*andptr++ = v; *andptr++ = vaddr(&p->from, nil);
break; break;
case Zil_rp: case Zil_rp:
rexflag |= regrex[p->to.type] & Rxb; rexflag |= regrex[p->to.type] & Rxb;
*andptr++ = op + reg[p->to.type]; *andptr++ = op + reg[p->to.type];
if(o->prefix == Pe) { if(o->prefix == Pe) {
v = vaddr(&p->from, nil);
*andptr++ = v; *andptr++ = v;
*andptr++ = v>>8; *andptr++ = v>>8;
} }
else else
put4(v); relput4(p, &p->from);
break; break;
case Zo_iw: case Zo_iw:
*andptr++ = op; *andptr++ = op;
if(p->from.type != D_NONE){ if(p->from.type != D_NONE){
v = vaddr(&p->from, nil);
*andptr++ = v; *andptr++ = v;
*andptr++ = v>>8; *andptr++ = v>>8;
} }
break; break;
case Ziq_rp: case Ziq_rp:
v = vaddr(&p->from, &rel);
l = v>>32; l = v>>32;
if(l == 0){ if(l == 0 && rel.siz != 8){
//p->mark |= 0100; //p->mark |= 0100;
//print("zero: %llux %P\n", v, p); //print("zero: %llux %P\n", v, p);
rexflag &= ~(0x40|Rxw); rexflag &= ~(0x40|Rxw);
rexflag |= regrex[p->to.type] & Rxb; rexflag |= regrex[p->to.type] & Rxb;
*andptr++ = 0xb8 + reg[p->to.type]; *andptr++ = 0xb8 + reg[p->to.type];
if(rel.type != 0) {
r = addrel(cursym);
*r = rel;
r->off = p->pc + andptr - and;
}
put4(v); put4(v);
}else if(l == -1 && (v&((uvlong)1<<31))!=0){ /* sign extend */ }else if(l == -1 && (v&((uvlong)1<<31))!=0){ /* sign extend */
//p->mark |= 0100; //p->mark |= 0100;
...@@ -1291,6 +1416,11 @@ found: ...@@ -1291,6 +1416,11 @@ found:
//print("all: %llux %P\n", v, p); //print("all: %llux %P\n", v, p);
rexflag |= regrex[p->to.type] & Rxb; rexflag |= regrex[p->to.type] & Rxb;
*andptr++ = op + reg[p->to.type]; *andptr++ = op + reg[p->to.type];
if(rel.type != 0) {
r = addrel(cursym);
*r = rel;
r->off = p->pc + andptr - and;
}
put8(v); put8(v);
} }
break; break;
...@@ -1298,53 +1428,54 @@ found: ...@@ -1298,53 +1428,54 @@ found:
case Zib_rr: case Zib_rr:
*andptr++ = op; *andptr++ = op;
asmand(&p->to, &p->to); asmand(&p->to, &p->to);
*andptr++ = v; *andptr++ = vaddr(&p->from, nil);
break; break;
case Z_il: case Z_il:
v = vaddr(&p->to);
case Zil_: case Zil_:
*andptr++ = op; if(t[2] == Zil_)
if(o->prefix == Pe) { a = &p->from;
*andptr++ = v;
*andptr++ = v>>8;
}
else else
put4(v); a = &p->to;
break;
case Zm_ilo:
v = vaddr(&p->to);
*andptr++ = op; *andptr++ = op;
asmando(&p->from, o->op[z+1]);
if(o->prefix == Pe) { if(o->prefix == Pe) {
v = vaddr(a, nil);
*andptr++ = v; *andptr++ = v;
*andptr++ = v>>8; *andptr++ = v>>8;
} }
else else
put4(v); relput4(p, a);
break; break;
case Zm_ilo:
case Zilo_m: case Zilo_m:
*andptr++ = op; *andptr++ = op;
if(t[2] == Zilo_m) {
a = &p->from;
asmando(&p->to, o->op[z+1]); asmando(&p->to, o->op[z+1]);
} else {
a = &p->to;
asmando(&p->from, o->op[z+1]);
}
if(o->prefix == Pe) { if(o->prefix == Pe) {
v = vaddr(a, nil);
*andptr++ = v; *andptr++ = v;
*andptr++ = v>>8; *andptr++ = v>>8;
} }
else else
put4(v); relput4(p, a);
break; break;
case Zil_rr: case Zil_rr:
*andptr++ = op; *andptr++ = op;
asmand(&p->to, &p->to); asmand(&p->to, &p->to);
if(o->prefix == Pe) { if(o->prefix == Pe) {
v = vaddr(&p->from, nil);
*andptr++ = v; *andptr++ = v;
*andptr++ = v>>8; *andptr++ = v>>8;
} }
else else
put4(v); relput4(p, &p->from);
break; break;
case Z_rp: case Z_rp:
...@@ -1362,67 +1493,132 @@ found: ...@@ -1362,67 +1493,132 @@ found:
asmand(&p->to, &p->to); asmand(&p->to, &p->to);
break; break;
case Zcall:
q = p->pcond;
if(q == nil) {
diag("call without target");
errorexit();
}
if(q->as != ATEXT) {
// Could handle this case by making D_PCREL
// record the Prog* instead of the Sym*, but let's
// wait until the need arises.
diag("call of non-TEXT");
errorexit();
}
*andptr++ = op;
r = addrel(cursym);
r->off = p->pc + andptr - and;
r->sym = q->from.sym;
r->type = D_PCREL;
r->siz = 4;
put4(0);
break;
case Zbr: case Zbr:
case Zjmp:
// TODO: jump across functions needs reloc
q = p->pcond; q = p->pcond;
if(q) { if(q == nil) {
v = q->pc - p->pc - 2; diag("jmp/branch without target");
if(v >= -128 && v <= 127) { errorexit();
}
if(q->as == ATEXT) {
if(t[2] == Zbr) {
diag("branch to ATEXT");
errorexit();
}
*andptr++ = o->op[z+1];
r = addrel(cursym);
r->off = p->pc + andptr - and;
r->sym = q->from.sym;
r->type = D_PCREL;
r->siz = 4;
put4(0);
break;
}
// Assumes q is in this function.
// TODO: Check in input, preserve in brchain.
// Fill in backward jump now.
if(p->back & 1) {
v = q->pc - (p->pc + 2);
if(v >= -128) {
*andptr++ = op; *andptr++ = op;
*andptr++ = v; *andptr++ = v;
} else { } else {
v -= 6-2; v -= 5-2;
if(t[2] == Zbr) {
*andptr++ = 0x0f; *andptr++ = 0x0f;
v--;
}
*andptr++ = o->op[z+1]; *andptr++ = o->op[z+1];
*andptr++ = v; *andptr++ = v;
*andptr++ = v>>8; *andptr++ = v>>8;
*andptr++ = v>>16; *andptr++ = v>>16;
*andptr++ = v>>24; *andptr++ = v>>24;
} }
}
break; break;
}
case Zcall: // Annotate target; will fill in later.
q = p->pcond; p->forwd = q->comefrom;
if(q) { q->comefrom = p;
v = q->pc - p->pc - 5; if(p->back & 2) { // short
*andptr++ = op; *andptr++ = op;
*andptr++ = v; *andptr++ = 0;
*andptr++ = v>>8; } else {
*andptr++ = v>>16; if(t[2] == Zbr)
*andptr++ = v>>24; *andptr++ = 0x0f;
*andptr++ = o->op[z+1];
*andptr++ = 0;
*andptr++ = 0;
*andptr++ = 0;
*andptr++ = 0;
} }
break; break;
case Zjmp: /*
q = p->pcond;
if(q) {
v = q->pc - p->pc - 2; v = q->pc - p->pc - 2;
if(v >= -128 && v <= 127) { if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
*andptr++ = op; *andptr++ = op;
*andptr++ = v; *andptr++ = v;
} else { } else {
v -= 5-2; v -= 5-2;
if(t[2] == Zbr) {
*andptr++ = 0x0f;
v--;
}
*andptr++ = o->op[z+1]; *andptr++ = o->op[z+1];
*andptr++ = v; *andptr++ = v;
*andptr++ = v>>8; *andptr++ = v>>8;
*andptr++ = v>>16; *andptr++ = v>>16;
*andptr++ = v>>24; *andptr++ = v>>24;
} }
} */
break; break;
case Zloop: case Zloop:
q = p->pcond; q = p->pcond;
if(q) { if(q == nil) {
diag("loop without target");
errorexit();
}
v = q->pc - p->pc - 2; v = q->pc - p->pc - 2;
if(v < -128 && v > 127) if(v < -128 && v > 127)
diag("loop too far: %P", p); diag("loop too far: %P", p);
*andptr++ = op; *andptr++ = op;
*andptr++ = v; *andptr++ = v;
}
break; break;
case Zbyte: case Zbyte:
v = vaddr(&p->from, &rel);
if(rel.siz != 0) {
rel.siz = op;
r = addrel(cursym);
*r = rel;
r->off = p->pc + andptr - and;
}
*andptr++ = v; *andptr++ = v;
if(op > 1) { if(op > 1) {
*andptr++ = v>>8; *andptr++ = v>>8;
...@@ -1595,6 +1791,7 @@ void ...@@ -1595,6 +1791,7 @@ void
asmins(Prog *p) asmins(Prog *p)
{ {
int n, np, c; int n, np, c;
Reloc *r;
rexflag = 0; rexflag = 0;
andptr = and; andptr = and;
...@@ -1604,7 +1801,7 @@ asmins(Prog *p) ...@@ -1604,7 +1801,7 @@ asmins(Prog *p)
/* /*
* as befits the whole approach of the architecture, * as befits the whole approach of the architecture,
* the rex prefix must appear before the first opcode byte * the rex prefix must appear before the first opcode byte
* (and thus after any 66/67/f2/f3 prefix bytes, but * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
* before the 0f opcode escape!), or it might be ignored. * before the 0f opcode escape!), or it might be ignored.
* note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
*/ */
...@@ -1616,6 +1813,11 @@ asmins(Prog *p) ...@@ -1616,6 +1813,11 @@ asmins(Prog *p)
if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26) if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
break; break;
} }
for(r=cursym->r+cursym->nr; r-- > cursym->r; ) {
if(r->off < p->pc)
break;
r->off++;
}
memmove(and+np+1, and+np, n-np); memmove(and+np+1, and+np, n-np);
and[np] = 0x40 | rexflag; and[np] = 0x40 | rexflag;
andptr++; andptr++;
......
...@@ -499,6 +499,7 @@ enum ...@@ -499,6 +499,7 @@ enum
D_CONST2 = D_INDIR+D_INDIR, D_CONST2 = D_INDIR+D_INDIR,
D_SIZE, /* 8l internal */ D_SIZE, /* 8l internal */
D_PCREL,
T_TYPE = 1<<0, T_TYPE = 1<<0,
T_INDEX = 1<<1, T_INDEX = 1<<1,
......
...@@ -156,6 +156,9 @@ relocsym(Sym *s) ...@@ -156,6 +156,9 @@ relocsym(Sym *s)
case D_ADDR: case D_ADDR:
o = symaddr(r->sym); o = symaddr(r->sym);
break; break;
case D_PCREL:
o = symaddr(r->sym) - (s->value + r->off + r->siz);
break;
case D_SIZE: case D_SIZE:
o = r->sym->size; o = r->sym->size;
break; break;
...@@ -190,11 +193,8 @@ reloc(void) ...@@ -190,11 +193,8 @@ reloc(void)
for(s=textp; s!=S; s=s->next) for(s=textp; s!=S; s=s->next)
relocsym(s); relocsym(s);
for(s=datap; s!=S; s=s->next) { for(s=datap; s!=S; s=s->next)
if(!s->reachable)
diag("unerachable? %s", s->name);
relocsym(s); relocsym(s);
}
} }
void void
...@@ -341,6 +341,70 @@ blk(Sym *allsym, int32 addr, int32 size) ...@@ -341,6 +341,70 @@ blk(Sym *allsym, int32 addr, int32 size)
cflush(); cflush();
} }
void
codeblk(int32 addr, int32 size)
{
Sym *sym;
int32 eaddr, i, n, epc;
Prog *p;
uchar *q;
if(debug['a'])
Bprint(&bso, "codeblk [%#x,%#x) at offset %#llx\n", addr, addr+size, seek(cout, 0, 1));
blk(textp, addr, size);
/* again for printing */
if(!debug['a'])
return;
for(sym = textp; sym != nil; sym = sym->next) {
if(!sym->reachable)
continue;
if(sym->value >= addr)
break;
}
eaddr = addr + size;
for(; sym != nil; sym = sym->next) {
if(!sym->reachable)
continue;
if(sym->value >= eaddr)
break;
if(addr < sym->value) {
Bprint(&bso, "%-20s %.8llux|", "_", addr);
for(; addr < sym->value; addr++)
Bprint(&bso, " %.2ux", 0);
Bprint(&bso, "\n");
}
p = sym->text;
Bprint(&bso, "%-20s %.8llux| %P\n", sym->name, addr, p);
for(p = p->link; p != P; p = p->link) {
if(p->link != P)
epc = p->link->pc;
else
epc = sym->value + sym->size;
Bprint(&bso, "%.6ux\t", p->pc);
q = sym->p + p->pc - sym->value;
n = epc - p->pc;
for(i=0; i<n; i++)
Bprint(&bso, "%.2ux", *q++);
for(; i < 10; i++)
Bprint(&bso, " ");
Bprint(&bso, " | %P\n", p);
addr += n;
}
}
if(addr < eaddr) {
Bprint(&bso, "%-20s %.8llux|", "_", addr);
for(; addr < eaddr; addr++)
Bprint(&bso, " %.2ux", 0);
}
Bflush(&bso);
}
void void
datblk(int32 addr, int32 size) datblk(int32 addr, int32 size)
{ {
...@@ -348,6 +412,9 @@ datblk(int32 addr, int32 size) ...@@ -348,6 +412,9 @@ datblk(int32 addr, int32 size)
int32 eaddr; int32 eaddr;
uchar *p, *ep; uchar *p, *ep;
if(debug['a'])
Bprint(&bso, "datblk [%#x,%#x) at offset %#llx\n", addr, addr+size, seek(cout, 0, 1));
blk(datap, addr, size); blk(datap, addr, size);
/* again for printing */ /* again for printing */
...@@ -363,10 +430,8 @@ datblk(int32 addr, int32 size) ...@@ -363,10 +430,8 @@ datblk(int32 addr, int32 size)
if(sym->value >= eaddr) if(sym->value >= eaddr)
break; break;
if(addr < sym->value) { if(addr < sym->value) {
Bprint(&bso, "%-20s %.8ux|", "(pre-pad)", addr); Bprint(&bso, "%-20s %.8ux| 00 ...\n", "(pre-pad)", addr);
for(; addr < sym->value; addr++) addr = sym->value;
Bprint(&bso, " %.2ux", 0);
Bprint(&bso, "\n");
} }
Bprint(&bso, "%-20s %.8ux|", sym->name, addr); Bprint(&bso, "%-20s %.8ux|", sym->name, addr);
p = sym->p; p = sym->p;
...@@ -379,11 +444,9 @@ datblk(int32 addr, int32 size) ...@@ -379,11 +444,9 @@ datblk(int32 addr, int32 size)
Bprint(&bso, "\n"); Bprint(&bso, "\n");
} }
if(addr < eaddr) { if(addr < eaddr)
Bprint(&bso, "%-20s %.8ux|", "(post-pad)", addr); Bprint(&bso, "%-20s %.8ux| 00 ...\n", "(post-pad)", addr);
for(; addr < eaddr; addr++) Bprint(&bso, "%-20s %.8ux|\n", "", eaddr);
Bprint(&bso, " %.2ux", 0);
}
} }
void void
......
...@@ -132,6 +132,7 @@ char* expandpkg(char*, char*); ...@@ -132,6 +132,7 @@ char* expandpkg(char*, char*);
void deadcode(void); void deadcode(void);
void ewrite(int, void*, int); void ewrite(int, void*, int);
Reloc* addrel(Sym*); Reloc* addrel(Sym*);
void codeblk(int32, int32);
void datblk(int32, int32); void datblk(int32, int32);
Sym* datsort(Sym*); Sym* datsort(Sym*);
void reloc(void); void reloc(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment