Commit 90093f06 authored by Russ Cox's avatar Russ Cox

liblink: introduce TLS register on 386 and amd64

When I did the original 386 ports on Linux and OS X, I chose to
define GS-relative expressions like 4(GS) as relative to the actual
thread-local storage base, which was usually GS but might not be
(it might be FS, or it might be a different constant offset from GS or FS).

The original scope was limited but since then the rewrites have
gotten out of control. Sometimes GS is rewritten, sometimes FS.
Some ports do other rewrites to enable shared libraries and
other linking. At no point in the code is it clear whether you are
looking at the real GS/FS or some synthesized thing that will be
rewritten. The code manipulating all these is duplicated in many
places.

The first step to fixing issue 7719 is to make the code intelligible
again.

This CL adds an explicit TLS pseudo-register to the 386 and amd64.
As a register, TLS refers to the thread-local storage base, and it
can only be loaded into another register:

        MOVQ TLS, AX

An offset from the thread-local storage base is written off(reg)(TLS*1).
Semantically it is off(reg), but the (TLS*1) annotation marks this as
indexing from the loaded TLS base. This emits a relocation so that
if the linker needs to adjust the offset, it can. For example:

        MOVQ TLS, AX
        MOVQ 8(AX)(TLS*1), CX // load m into CX

On systems that support direct access to the TLS memory, this
pair of instructions can be reduced to a direct TLS memory reference:

        MOVQ 8(TLS), CX // load m into CX

The 2-instruction and 1-instruction forms correspond roughly to
ELF TLS initial exec mode and ELF TLS local exec mode, respectively.

Liblink applies this rewrite on systems that support the 1-instruction form.
The decision is made using only the operating system (and probably
the -shared flag, eventually), not the link mode. If some link modes
on a particular operating system require the 2-instruction form,
then all builds for that operating system will use the 2-instruction
form, so that the link mode decision can be delayed to link time.

Obviously it is late to be making changes like this, but I despair
of correcting issue 7719 and issue 7164 without it. To make sure
I am not changing existing behavior, I built a "hello world" program
for every GOOS/GOARCH combination we have and then worked
to make sure that the rewrite generates exactly the same binaries,
byte for byte. There are a handful of TODOs in the code marking
kludges to get the byte-for-byte property, but at least now I can
explain exactly how each binary is handled.

The targets I tested this way are:

        darwin-386
        darwin-amd64
        dragonfly-386
        dragonfly-amd64
        freebsd-386
        freebsd-amd64
        freebsd-arm
        linux-386
        linux-amd64
        linux-arm
        nacl-386
        nacl-amd64p32
        netbsd-386
        netbsd-amd64
        openbsd-386
        openbsd-amd64
        plan9-386
        plan9-amd64
        solaris-amd64
        windows-386
        windows-amd64

There were four exceptions to the byte-for-byte goal:

windows-386 and windows-amd64 have a time stamp
at bytes 137 and 138 of the header.

darwin-386 and plan9-386 have five or six modified
bytes in the middle of the Go symbol table, caused by
editing comments in runtime/sys_{darwin,plan9}_386.s.

Fixes #7164.

LGTM=iant
R=iant, aram, minux.ma, dave
CC=golang-codereviews
https://golang.org/cl/87920043
parent aeb37527
...@@ -232,6 +232,8 @@ enum ...@@ -232,6 +232,8 @@ enum
R_CONST, R_CONST,
R_PCREL, R_PCREL,
R_TLS, R_TLS,
R_TLS_LE, // TLS local exec offset from TLS segment register
R_TLS_IE, // TLS initial exec offset from TLS base pointer
R_GOTOFF, R_GOTOFF,
R_PLT0, R_PLT0,
R_PLT1, R_PLT1,
...@@ -340,7 +342,6 @@ struct Link ...@@ -340,7 +342,6 @@ struct Link
char* thestring; // full name of architecture ("arm", "amd64", ..) char* thestring; // full name of architecture ("arm", "amd64", ..)
int32 goarm; // for arm only, GOARM setting int32 goarm; // for arm only, GOARM setting
int headtype; int headtype;
int linkmode;
LinkArch* arch; LinkArch* arch;
int32 (*ignore)(char*); // do not emit names satisfying this function int32 (*ignore)(char*); // do not emit names satisfying this function
......
...@@ -334,6 +334,7 @@ struct ...@@ -334,6 +334,7 @@ struct
"TR5", LBREG, D_TR+5, "TR5", LBREG, D_TR+5,
"TR6", LBREG, D_TR+6, "TR6", LBREG, D_TR+6,
"TR7", LBREG, D_TR+7, "TR7", LBREG, D_TR+7,
"TLS", LSREG, D_TLS,
"AAA", LTYPE0, AAAA, "AAA", LTYPE0, AAAA,
"AAD", LTYPE0, AAAD, "AAD", LTYPE0, AAAD,
......
...@@ -513,7 +513,7 @@ naddr(Node *n, Addr *a) ...@@ -513,7 +513,7 @@ naddr(Node *n, Addr *a)
break; break;
case OEXREG: case OEXREG:
a->type = D_INDIR + D_GS; a->type = D_INDIR + D_TLS;
a->offset = n->reg - 1; a->offset = n->reg - 1;
break; break;
......
...@@ -850,20 +850,18 @@ enum ...@@ -850,20 +850,18 @@ enum
D_DR = 95, D_DR = 95,
D_TR = 103, D_TR = 103,
D_NONE = 111, D_TLS = 111,
D_NONE = 112,
D_BRANCH = 112,
D_EXTERN = 113, D_BRANCH = 113,
D_STATIC = 114, D_EXTERN = 114,
D_AUTO = 115, D_STATIC = 115,
D_PARAM = 116, D_AUTO = 116,
D_CONST = 117, D_PARAM = 117,
D_FCONST = 118, D_CONST = 118,
D_SCONST = 119, D_FCONST = 119,
D_ADDR = 120, D_SCONST = 120,
D_ADDR = 121,
D_FILE,
D_FILE1,
D_INDIR, /* additive */ D_INDIR, /* additive */
......
...@@ -79,7 +79,6 @@ archinit(void) ...@@ -79,7 +79,6 @@ archinit(void)
case Hsolaris: case Hsolaris:
break; break;
} }
ctxt->linkmode = linkmode;
switch(HEADTYPE) { switch(HEADTYPE) {
default: default:
......
...@@ -241,6 +241,7 @@ struct ...@@ -241,6 +241,7 @@ struct
"ES", LSREG, D_ES, "ES", LSREG, D_ES,
"FS", LSREG, D_FS, "FS", LSREG, D_FS,
"GS", LSREG, D_GS, "GS", LSREG, D_GS,
"TLS", LSREG, D_TLS,
"GDTR", LBREG, D_GDTR, "GDTR", LBREG, D_GDTR,
"IDTR", LBREG, D_IDTR, "IDTR", LBREG, D_IDTR,
......
...@@ -464,7 +464,7 @@ naddr(Node *n, Addr *a) ...@@ -464,7 +464,7 @@ naddr(Node *n, Addr *a)
break; break;
case OEXREG: case OEXREG:
a->type = D_INDIR + D_GS; a->type = D_INDIR + D_TLS;
a->offset = n->reg - 1; a->offset = n->reg - 1;
break; break;
......
...@@ -636,21 +636,19 @@ enum ...@@ -636,21 +636,19 @@ enum
D_X5, D_X5,
D_X6, D_X6,
D_X7, D_X7,
D_NONE = 67, D_TLS = 67,
D_NONE = 68,
D_BRANCH = 68,
D_EXTERN = 69, D_BRANCH = 69,
D_STATIC = 70, D_EXTERN = 70,
D_AUTO = 71, D_STATIC = 71,
D_PARAM = 72, D_AUTO = 72,
D_CONST = 73, D_PARAM = 73,
D_FCONST = 74, D_CONST = 74,
D_SCONST = 75, D_FCONST = 75,
D_ADDR = 76, D_SCONST = 76,
D_ADDR = 77,
D_FILE,
D_FILE1,
D_INDIR, /* additive */ D_INDIR, /* additive */
......
...@@ -69,7 +69,6 @@ archinit(void) ...@@ -69,7 +69,6 @@ archinit(void)
case Hopenbsd: case Hopenbsd:
break; break;
} }
ctxt->linkmode = linkmode;
switch(HEADTYPE) { switch(HEADTYPE) {
default: default:
......
...@@ -127,99 +127,22 @@ static struct { ...@@ -127,99 +127,22 @@ static struct {
char *goos; char *goos;
char *hdr; char *hdr;
} zasmhdr[] = { } zasmhdr[] = {
{"386", "windows",
"#define get_tls(r) MOVL 0x14(FS), r\n"
"#define g(r) 0(r)\n"
"#define m(r) 4(r)\n"
},
{"386", "plan9",
"// Plan 9 does not have per-process segment descriptors with\n"
"// which to do thread-local storage. Instead, we will use a\n"
"// fixed offset from the per-process TOS struct address for\n"
"// the local storage. Since the process ID is contained in the\n"
"// TOS struct, we specify an offset for that here as well.\n"
"#define get_tls(r) MOVL _tos(SB), r \n"
"#define g(r) -8(r)\n"
"#define m(r) -4(r)\n"
"#define procid(r) 48(r)\n"
},
{"386", "linux",
"// On Linux systems, what we call 0(GS) and 4(GS) for g and m\n"
"// turn into %gs:-8 and %gs:-4 (using gcc syntax to denote\n"
"// what the machine sees as opposed to 8l input).\n"
"// 8l rewrites 0(GS) and 4(GS) into these.\n"
"//\n"
"// On Linux Xen, it is not allowed to use %gs:-8 and %gs:-4\n"
"// directly. Instead, we have to store %gs:0 into a temporary\n"
"// register and then use -8(%reg) and -4(%reg). This kind\n"
"// of addressing is correct even when not running Xen.\n"
"//\n"
"// 8l can rewrite MOVL 0(GS), CX into the appropriate pair\n"
"// of mov instructions, using CX as the intermediate register\n"
"// (safe because CX is about to be written to anyway).\n"
"// But 8l cannot handle other instructions, like storing into 0(GS),\n"
"// which is where these macros come into play.\n"
"// get_tls sets up the temporary and then g and r use it.\n"
"//\n"
"// Another wrinkle is that get_tls needs to read from %gs:0,\n"
"// but in 8l input it's called 8(GS), because 8l is going to\n"
"// subtract 8 from all the offsets, as described above.\n"
"//\n"
"// The final wrinkle is that when generating an ELF .o file for\n"
"// external linking mode, we need to be able to relocate the\n"
"// -8(r) and -4(r) instructions. Tag them with an extra (GS*1)\n"
"// that is ignored by the linker except for that identification.\n"
"#define get_tls(r) MOVL 8(GS), r\n"
"#define g(r) -8(r)(GS*1)\n"
"#define m(r) -4(r)(GS*1)\n"
},
{"386", "nacl",
// Same as Linux above.
"#define get_tls(r) MOVL 8(GS), r\n"
"#define g(r) -8(r)(GS*1)\n"
"#define m(r) -4(r)(GS*1)\n"
},
{"386", "", {"386", "",
"#define get_tls(r)\n" "#define get_tls(r) MOVL TLS, r\n"
"#define g(r) 0(GS)\n" "#define g(r) 0(r)(TLS*1)\n"
"#define m(r) 4(GS)\n" "#define m(r) 4(r)(TLS*1)\n"
},
{"amd64p32", "nacl",
"#define get_tls(r)\n"
"#define g(r) 0(GS)\n"
"#define m(r) 4(GS)\n"
},
{"amd64", "windows",
"#define get_tls(r) MOVQ 0x28(GS), r\n"
"#define g(r) 0(r)\n"
"#define m(r) 8(r)\n"
},
{"amd64", "plan9",
"#define get_tls(r)\n"
"#define g(r) 0(GS)\n"
"#define m(r) 8(GS)\n"
"#define procid(r) 16(GS)\n"
}, },
{"amd64", "solaris", {"amd64p32", "",
"#define get_tls(r) MOVQ 0(FS), r\n" "#define get_tls(r) MOVL TLS, r\n"
"#define g(r) -16(r)(FS*1)\n" "#define g(r) 0(r)(TLS*1)\n"
"#define m(r) -8(r)(FS*1)\n" "#define m(r) 4(r)(TLS*1)\n"
},
// The TLS accessors here are defined here to use initial exec model.
// If the linker is not outputting a shared library, it will reduce
// the TLS accessors to the local exec model, effectively removing
// get_tls().
{"amd64", "linux",
"#define get_tls(r) MOVQ runtime·tlsgm(SB), r\n"
"#define g(r) 0(r)(GS*1)\n"
"#define m(r) 8(r)(GS*1)\n"
}, },
{"amd64", "", {"amd64", "",
"#define get_tls(r)\n" "#define get_tls(r) MOVQ TLS, r\n"
"#define g(r) 0(GS)\n" "#define g(r) 0(r)(TLS*1)\n"
"#define m(r) 8(GS)\n" "#define m(r) 8(r)(TLS*1)\n"
}, },
{"arm", "", {"arm", "",
"#define LR R14\n" "#define LR R14\n"
}, },
......
...@@ -183,6 +183,17 @@ relocsym(LSym *s) ...@@ -183,6 +183,17 @@ relocsym(LSym *s)
if(thechar != '6') if(thechar != '6')
o = r->add; o = r->add;
break; break;
case R_TLS_LE:
o = ctxt->tlsoffset + r->add;
break;
case R_TLS_IE:
if(iself || ctxt->headtype == Hplan9)
o = ctxt->tlsoffset + r->add;
else if(ctxt->headtype == Hwindows)
o = r->add;
else
sysfatal("unexpected R_TLS_IE relocation for %s", headstr(ctxt->headtype));
break;
case R_ADDR: case R_ADDR:
if(linkmode == LinkExternal && r->sym->type != SCONST) { if(linkmode == LinkExternal && r->sym->type != SCONST) {
r->done = 0; r->done = 0;
...@@ -262,6 +273,10 @@ relocsym(LSym *s) ...@@ -262,6 +273,10 @@ relocsym(LSym *s)
default: default:
ctxt->cursym = s; ctxt->cursym = s;
diag("bad reloc size %#ux for %s", siz, r->sym->name); diag("bad reloc size %#ux for %s", siz, r->sym->name);
case 1:
// TODO(rsc): Remove.
s->p[off] = (int8)o;
break;
case 4: case 4:
if(r->type == R_PCREL) { if(r->type == R_PCREL) {
if(o != (int32)o) if(o != (int32)o)
...@@ -312,6 +327,8 @@ dynrelocsym(LSym *s) ...@@ -312,6 +327,8 @@ dynrelocsym(LSym *s)
return; return;
for(r=s->r; r<s->r+s->nr; r++) { for(r=s->r; r<s->r+s->nr; r++) {
targ = r->sym; targ = r->sym;
if(targ == nil)
continue;
if(!targ->reachable) if(!targ->reachable)
diag("internal inconsistency: dynamic symbol %s is not reachable.", targ->name); diag("internal inconsistency: dynamic symbol %s is not reachable.", targ->name);
if(r->sym->plt == -2 && r->sym->got != -2) { // make dynimport JMP table for PE object files. if(r->sym->plt == -2 && r->sym->got != -2) { // make dynimport JMP table for PE object files.
......
...@@ -143,7 +143,6 @@ main(int argc, char *argv[]) ...@@ -143,7 +143,6 @@ main(int argc, char *argv[])
headstring = headstr(HEADTYPE); headstring = headstr(HEADTYPE);
archinit(); archinit();
ctxt->linkmode = linkmode;
ctxt->debugfloat = debug['F']; ctxt->debugfloat = debug['F'];
if(debug['v']) if(debug['v'])
......
...@@ -114,6 +114,7 @@ enum ...@@ -114,6 +114,7 @@ enum
Ytr0, Ytr1, Ytr2, Ytr3, Ytr4, Ytr5, Ytr6, Ytr7, Yrl32, Yrl64, Ytr0, Ytr1, Ytr2, Ytr3, Ytr4, Ytr5, Ytr6, Ytr7, Yrl32, Yrl64,
Ymr, Ymm, Ymr, Ymm,
Yxr, Yxm, Yxr, Yxm,
Ytls,
Ymax, Ymax,
Zxxx = 0, Zxxx = 0,
...@@ -1871,7 +1872,7 @@ instinit(void) ...@@ -1871,7 +1872,7 @@ instinit(void)
} }
static int static int
prefixof(Addr *a) prefixof(Link *ctxt, Addr *a)
{ {
switch(a->type) { switch(a->type) {
case D_INDIR+D_CS: case D_INDIR+D_CS:
...@@ -1884,6 +1885,27 @@ prefixof(Addr *a) ...@@ -1884,6 +1885,27 @@ prefixof(Addr *a)
return 0x64; return 0x64;
case D_INDIR+D_GS: case D_INDIR+D_GS:
return 0x65; return 0x65;
case D_INDIR+D_TLS:
// NOTE: Systems listed here should be only systems that
// support direct TLS references like 8(TLS) implemented as
// direct references from FS or GS. Systems that require
// the initial-exec model, where you load the TLS base into
// a register and then index from that register, do not reach
// this code and should not be listed.
switch(ctxt->headtype) {
default:
sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype));
case Hdragonfly:
case Hfreebsd:
case Hlinux:
case Hnetbsd:
case Hopenbsd:
case Hplan9:
case Hsolaris:
return 0x64; // FS
case Hdarwin:
return 0x65; // GS
}
} }
switch(a->index) { switch(a->index) {
case D_CS: case D_CS:
...@@ -2033,6 +2055,7 @@ oclass(Link *ctxt, Addr *a) ...@@ -2033,6 +2055,7 @@ oclass(Link *ctxt, Addr *a)
case D_ES: return Yes; case D_ES: return Yes;
case D_FS: return Yfs; case D_FS: return Yfs;
case D_GS: return Ygs; case D_GS: return Ygs;
case D_TLS: return Ytls;
case D_GDTR: return Ygdtr; case D_GDTR: return Ygdtr;
case D_IDTR: return Yidtr; case D_IDTR: return Yidtr;
...@@ -2278,6 +2301,19 @@ vaddr(Link *ctxt, Addr *a, Reloc *r) ...@@ -2278,6 +2301,19 @@ vaddr(Link *ctxt, Addr *a, Reloc *r)
r->type = R_PCREL; r->type = R_PCREL;
} else } else
r->type = R_ADDR; r->type = R_ADDR;
break;
case D_INDIR+D_TLS:
if(r == nil) {
ctxt->diag("need reloc for %D", a);
sysfatal("reloc");
}
r->type = R_TLS_LE;
r->siz = 4;
r->off = -1; // caller must fill in
r->add = v;
v = 0;
break;
} }
return v; return v;
} }
...@@ -2294,7 +2330,7 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64) ...@@ -2294,7 +2330,7 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64)
v = a->offset; v = a->offset;
t = a->type; t = a->type;
rel.siz = 0; rel.siz = 0;
if(a->index != D_NONE && a->index != D_FS && a->index != D_GS) { if(a->index != D_NONE && a->index != D_TLS) {
if(t < D_INDIR) { if(t < D_INDIR) {
switch(t) { switch(t) {
default: default:
...@@ -2360,9 +2396,11 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64) ...@@ -2360,9 +2396,11 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64)
scale = 1; scale = 1;
} else } else
t -= D_INDIR; t -= D_INDIR;
if(t == D_TLS)
v = vaddr(ctxt, a, &rel);
ctxt->rexflag |= (regrex[t] & Rxb) | rex; ctxt->rexflag |= (regrex[t] & Rxb) | rex;
if(t == D_NONE || (D_CS <= t && t <= D_GS)) { if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) {
if((ctxt->flag_shared || ctxt->headtype == Hnacl) && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || ctxt->asmode != 64) { if((ctxt->flag_shared || ctxt->headtype == Hnacl) && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || ctxt->asmode != 64) {
*ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3); *ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3);
goto putrelv; goto putrelv;
...@@ -2389,17 +2427,38 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64) ...@@ -2389,17 +2427,38 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64)
goto putrelv; goto putrelv;
} }
if(t >= D_AX && t <= D_R15) { if(t >= D_AX && t <= D_R15) {
if(v == 0 && t != D_BP && t != D_R13) { // TODO: Remove Hwindows condition.
if(v == 0 && t != D_BP && t != D_R13 && (a->index != D_TLS || (ctxt->headtype == Hwindows && a->scale == 2))) {
*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3); *ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
return; return;
} }
if(v >= -128 && v < 128) { if(v >= -128 && v < 128 && (a->index != D_TLS || a->scale != 1)) {
ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3); ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
if(a->index == D_TLS) {
Reloc *r;
memset(&rel, 0, sizeof rel);
rel.type = R_TLS_IE;
rel.siz = 1;
rel.sym = nil;
rel.add = v;
r = addrel(ctxt->cursym);
*r = rel;
r->off = ctxt->curp->pc + ctxt->andptr + 1 - ctxt->and;
v = 0;
}
ctxt->andptr[1] = v; ctxt->andptr[1] = v;
ctxt->andptr += 2; ctxt->andptr += 2;
return; return;
} }
*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3); *ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
if(a->index == D_TLS) {
memset(&rel, 0, sizeof rel);
rel.type = R_TLS_IE;
rel.siz = 4;
rel.sym = nil;
rel.add = v;
v = 0;
}
goto putrelv; goto putrelv;
} }
goto bad; goto bad;
...@@ -2574,6 +2633,10 @@ static Movtab ymovtab[] = ...@@ -2574,6 +2633,10 @@ static Movtab ymovtab[] =
{ASHRQ, Ycol, Yml, 6, Pw,0xac,0xad,0}, {ASHRQ, Ycol, Yml, 6, Pw,0xac,0xad,0},
{ASHLW, Ycol, Yml, 6, Pe,0xa4,0xa5,0}, {ASHLW, Ycol, Yml, 6, Pe,0xa4,0xa5,0},
{ASHRW, Ycol, Yml, 6, Pe,0xac,0xad,0}, {ASHRW, Ycol, Yml, 6, Pe,0xac,0xad,0},
/* load TLS base */
{AMOVQ, Ytls, Yrl, 7, 0,0,0,0},
0 0
}; };
...@@ -2664,10 +2727,10 @@ doasm(Link *ctxt, Prog *p) ...@@ -2664,10 +2727,10 @@ doasm(Link *ctxt, Prog *p)
return; return;
} }
pre = prefixof(&p->from); pre = prefixof(ctxt, &p->from);
if(pre) if(pre)
*ctxt->andptr++ = pre; *ctxt->andptr++ = pre;
pre = prefixof(&p->to); pre = prefixof(ctxt, &p->to);
if(pre) if(pre)
*ctxt->andptr++ = pre; *ctxt->andptr++ = pre;
...@@ -3296,6 +3359,43 @@ mfound: ...@@ -3296,6 +3359,43 @@ mfound:
break; break;
} }
break; break;
case 7: /* mov tls, r */
// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
// where you load the TLS base register into a register and then index off that
// register to access the actual TLS variables. Systems that allow direct TLS access
// are handled in prefixof above and should not be listed here.
switch(ctxt->headtype) {
default:
sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype));
case Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
// TLS base is 0(FS).
pp.from = p->from;
pp.from.type = D_INDIR+D_NONE;
pp.from.offset = 0;
pp.from.index = D_NONE;
pp.from.scale = 0;
ctxt->rexflag |= Pw;
*ctxt->andptr++ = 0x64; // FS
*ctxt->andptr++ = 0x8B;
asmand(ctxt, &pp.from, &p->to);
break;
case Hwindows:
// Windows TLS base is always 0x28(GS).
pp.from = p->from;
pp.from.type = D_INDIR+D_GS;
pp.from.offset = 0x28;
pp.from.index = D_NONE;
pp.from.scale = 0;
ctxt->rexflag |= Pw;
*ctxt->andptr++ = 0x65; // GS
*ctxt->andptr++ = 0x8B;
asmand(ctxt, &pp.from, &p->to);
break;
}
break;
} }
} }
......
...@@ -78,6 +78,7 @@ enum ...@@ -78,6 +78,7 @@ enum
Ym, Ym,
Ybr, Ybr,
Ycol, Ycol,
Ytls,
Ycs, Yss, Yds, Yes, Yfs, Ygs, Ycs, Yss, Yds, Yes, Yfs, Ygs,
Ygdtr, Yidtr, Yldtr, Ymsw, Ytask, Ygdtr, Yidtr, Yldtr, Ymsw, Ytask,
...@@ -1441,7 +1442,7 @@ instinit(void) ...@@ -1441,7 +1442,7 @@ instinit(void)
} }
static int static int
prefixof(Addr *a) prefixof(Link *ctxt, Addr *a)
{ {
switch(a->type) { switch(a->type) {
case D_INDIR+D_CS: case D_INDIR+D_CS:
...@@ -1454,6 +1455,23 @@ prefixof(Addr *a) ...@@ -1454,6 +1455,23 @@ prefixof(Addr *a)
return 0x64; return 0x64;
case D_INDIR+D_GS: case D_INDIR+D_GS:
return 0x65; return 0x65;
case D_INDIR+D_TLS:
// NOTE: Systems listed here should be only systems that
// support direct TLS references like 8(TLS) implemented as
// direct references from FS or GS. Systems that require
// the initial-exec model, where you load the TLS base into
// a register and then index from that register, do not reach
// this code and should not be listed.
switch(ctxt->headtype) {
default:
sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype));
case Hdarwin:
case Hdragonfly:
case Hfreebsd:
case Hnetbsd:
case Hopenbsd:
return 0x65; // GS
}
} }
return 0; return 0;
} }
...@@ -1543,6 +1561,7 @@ oclass(Addr *a) ...@@ -1543,6 +1561,7 @@ oclass(Addr *a)
case D_ES: return Yes; case D_ES: return Yes;
case D_FS: return Yfs; case D_FS: return Yfs;
case D_GS: return Ygs; case D_GS: return Ygs;
case D_TLS: return Ytls;
case D_GDTR: return Ygdtr; case D_GDTR: return Ygdtr;
case D_IDTR: return Yidtr; case D_IDTR: return Yidtr;
...@@ -1724,6 +1743,19 @@ vaddr(Link *ctxt, Addr *a, Reloc *r) ...@@ -1724,6 +1743,19 @@ vaddr(Link *ctxt, Addr *a, Reloc *r)
r->add = v; r->add = v;
v = 0; v = 0;
} }
break;
case D_INDIR+D_TLS:
if(r == nil) {
ctxt->diag("need reloc for %D", a);
sysfatal("bad code");
}
r->type = R_TLS_LE;
r->siz = 4;
r->off = -1; // caller must fill in
r->add = v;
v = 0;
break;
} }
return v; return v;
} }
...@@ -1738,7 +1770,7 @@ asmand(Link *ctxt, Addr *a, int r) ...@@ -1738,7 +1770,7 @@ asmand(Link *ctxt, Addr *a, int r)
v = a->offset; v = a->offset;
t = a->type; t = a->type;
rel.siz = 0; rel.siz = 0;
if(a->index != D_NONE && a->index != D_FS && a->index != D_GS) { if(a->index != D_NONE && a->index != D_TLS) {
if(t < D_INDIR || t >= 2*D_INDIR) { if(t < D_INDIR || t >= 2*D_INDIR) {
switch(t) { switch(t) {
default: default:
...@@ -1801,8 +1833,10 @@ asmand(Link *ctxt, Addr *a, int r) ...@@ -1801,8 +1833,10 @@ asmand(Link *ctxt, Addr *a, int r)
scale = 1; scale = 1;
} else } else
t -= D_INDIR; t -= D_INDIR;
if(t == D_TLS)
v = vaddr(ctxt, a, &rel);
if(t == D_NONE || (D_CS <= t && t <= D_GS)) { if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) {
*ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3); *ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3);
goto putrelv; goto putrelv;
} }
...@@ -1823,17 +1857,43 @@ asmand(Link *ctxt, Addr *a, int r) ...@@ -1823,17 +1857,43 @@ asmand(Link *ctxt, Addr *a, int r)
goto putrelv; goto putrelv;
} }
if(t >= D_AX && t <= D_DI) { if(t >= D_AX && t <= D_DI) {
if(v == 0 && rel.siz == 0 && t != D_BP) { // TODO(rsc): Remove the Hwindows test.
// As written it produces the same byte-identical output as the code it replaced.
if(v == 0 && rel.siz == 0 && t != D_BP && (a->index != D_TLS || ctxt->headtype == Hwindows)) {
*ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3); *ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
return; return;
} }
if(v >= -128 && v < 128 && rel.siz == 0 && a->index != D_FS && a->index != D_GS) { // TODO(rsc): Change a->index tests to check D_TLS.
// Then remove the if statement inside the body.
// As written the code is clearly incorrect for external linking,
// but as written it produces the same byte-identical output as the code it replaced.
if(v >= -128 && v < 128 && rel.siz == 0 && (a->index != D_TLS || ctxt->headtype == Hwindows || a->scale != 1)) {
ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3); ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
if(a->index == D_TLS) {
Reloc *r;
memset(&rel, 0, sizeof rel);
rel.type = R_TLS_IE;
rel.siz = 1;
rel.sym = nil;
rel.add = v;
r = addrel(ctxt->cursym);
*r = rel;
r->off = ctxt->curp->pc + ctxt->andptr + 1 - ctxt->and;
v = 0;
}
ctxt->andptr[1] = v; ctxt->andptr[1] = v;
ctxt->andptr += 2; ctxt->andptr += 2;
return; return;
} }
*ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3); *ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
if(a->index == D_TLS) {
memset(&rel, 0, sizeof rel);
rel.type = R_TLS_IE;
rel.siz = 4;
rel.sym = nil;
rel.add = v;
v = 0;
}
goto putrelv; goto putrelv;
} }
goto bad; goto bad;
...@@ -1961,6 +2021,10 @@ static uchar ymovtab[] = ...@@ -1961,6 +2021,10 @@ static uchar ymovtab[] =
/* extra imul */ /* extra imul */
AIMULW, Yml, Yrl, 7, Pq,0xaf,0,0, AIMULW, Yml, Yrl, 7, Pq,0xaf,0,0,
AIMULL, Yml, Yrl, 7, Pm,0xaf,0,0, AIMULL, Yml, Yrl, 7, Pm,0xaf,0,0,
/* load TLS base pointer */
AMOVL, Ytls, Yrl, 8, 0,0,0,0,
0 0
}; };
...@@ -2108,10 +2172,10 @@ doasm(Link *ctxt, Prog *p) ...@@ -2108,10 +2172,10 @@ doasm(Link *ctxt, Prog *p)
ctxt->curp = p; // TODO ctxt->curp = p; // TODO
pre = prefixof(&p->from); pre = prefixof(ctxt, &p->from);
if(pre) if(pre)
*ctxt->andptr++ = pre; *ctxt->andptr++ = pre;
pre = prefixof(&p->to); pre = prefixof(ctxt, &p->to);
if(pre) if(pre)
*ctxt->andptr++ = pre; *ctxt->andptr++ = pre;
...@@ -2628,6 +2692,54 @@ mfound: ...@@ -2628,6 +2692,54 @@ mfound:
*ctxt->andptr++ = t[5]; *ctxt->andptr++ = t[5];
asmand(ctxt, &p->from, reg[p->to.type]); asmand(ctxt, &p->from, reg[p->to.type]);
break; break;
case 8: /* mov tls, r */
// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
// where you load the TLS base register into a register and then index off that
// register to access the actual TLS variables. Systems that allow direct TLS access
// are handled in prefixof above and should not be listed here.
switch(ctxt->headtype) {
default:
sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype));
case Hlinux:
case Hnacl:
// ELF TLS base is 0(GS).
pp.from = p->from;
pp.from.type = D_INDIR+D_GS;
pp.from.offset = 0;
pp.from.index = D_NONE;
pp.from.scale = 0;
*ctxt->andptr++ = 0x65; // GS
*ctxt->andptr++ = 0x8B;
asmand(ctxt, &pp.from, reg[p->to.type]);
break;
case Hplan9:
if(ctxt->plan9tos == nil)
ctxt->plan9tos = linklookup(ctxt, "_tos", 0);
memset(&pp.from, 0, sizeof pp.from);
pp.from.type = D_EXTERN;
pp.from.sym = ctxt->plan9tos;
pp.from.offset = 0;
pp.from.index = D_NONE;
*ctxt->andptr++ = 0x8B;
asmand(ctxt, &pp.from, reg[p->to.type]);
break;
case Hwindows:
// Windows TLS base is always 0x14(FS).
pp.from = p->from;
pp.from.type = D_INDIR+D_FS;
pp.from.offset = 0x14;
pp.from.index = D_NONE;
pp.from.scale = 0;
*ctxt->andptr++ = 0x64; // FS
*ctxt->andptr++ = 0x8B;
asmand(ctxt, &pp.from, reg[p->to.type]);
break;
}
break;
} }
} }
......
...@@ -341,6 +341,7 @@ char* regstr[] = ...@@ -341,6 +341,7 @@ char* regstr[] =
"TR6", "TR6",
"TR7", "TR7",
"TLS", /* [D_TLS] */
"NONE", /* [D_NONE] */ "NONE", /* [D_NONE] */
}; };
......
...@@ -289,6 +289,7 @@ char* regstr[] = ...@@ -289,6 +289,7 @@ char* regstr[] =
"X6", "X6",
"X7", "X7",
"TLS", /* [D_TLS] */
"NONE", /* [D_NONE] */ "NONE", /* [D_NONE] */
}; };
......
...@@ -99,6 +99,17 @@ settextflag(Prog *p, int f) ...@@ -99,6 +99,17 @@ settextflag(Prog *p, int f)
static void nacladdr(Link*, Prog*, Addr*); static void nacladdr(Link*, Prog*, Addr*);
static int
canuselocaltls(Link *ctxt)
{
switch(ctxt->headtype) {
// case Hlinux:
case Hwindows:
return 0;
}
return 1;
}
static void static void
progedit(Link *ctxt, Prog *p) progedit(Link *ctxt, Prog *p)
{ {
...@@ -106,105 +117,98 @@ progedit(Link *ctxt, Prog *p) ...@@ -106,105 +117,98 @@ progedit(Link *ctxt, Prog *p)
LSym *s; LSym *s;
Prog *q; Prog *q;
if(ctxt->headtype == Hnacl) { // Thread-local storage references use the TLS pseudo-register.
nacladdr(ctxt, p, &p->from); // As a register, TLS refers to the thread-local storage base, and it
nacladdr(ctxt, p, &p->to); // can only be loaded into another register:
} //
// MOVQ TLS, AX
if(p->from.type == D_INDIR+D_GS || p->from.index == D_GS) //
p->from.offset += ctxt->tlsoffset; // An offset from the thread-local storage base is written off(reg)(TLS*1).
if(p->to.type == D_INDIR+D_GS || p->to.index == D_GS) // Semantically it is off(reg), but the (TLS*1) annotation marks this as
p->to.offset += ctxt->tlsoffset; // indexing from the loaded TLS base. This emits a relocation so that
// if the linker needs to adjust the offset, it can. For example:
if(ctxt->gmsym == nil) //
ctxt->gmsym = linklookup(ctxt, "runtime.tlsgm", 0); // MOVQ TLS, AX
// MOVQ 8(AX)(TLS*1), CX // load m into CX
if(ctxt->headtype == Hwindows) { //
// Windows // On systems that support direct access to the TLS memory, this
// Convert // pair of instructions can be reduced to a direct TLS memory reference:
// op n(GS), reg //
// to // MOVQ 8(TLS), CX // load m into CX
// MOVL 0x28(GS), reg //
// op n(reg), reg // The 2-instruction and 1-instruction forms correspond roughly to
// The purpose of this patch is to fix some accesses // ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
// to extern register variables (TLS) on Windows, as //
// a different method is used to access them. // We applies this rewrite on systems that support the 1-instruction form.
if(p->from.type == D_INDIR+D_GS // The decision is made using only the operating system (and probably
&& p->to.type >= D_AX && p->to.type <= D_DI // the -shared flag, eventually), not the link mode. If some link modes
&& p->from.offset <= 8) { // on a particular operating system require the 2-instruction form,
q = appendp(ctxt, p); // then all builds for that operating system will use the 2-instruction
q->from = p->from; // form, so that the link mode decision can be delayed to link time.
q->from.type = D_INDIR + p->to.type; //
q->to = p->to; // In this way, all supported systems use identical instructions to
q->as = p->as; // access TLS, and they are rewritten appropriately first here in
p->as = AMOVQ; // liblink and then finally using relocations in the linker.
p->from.type = D_INDIR+D_GS;
p->from.offset = 0x28; if(canuselocaltls(ctxt)) {
} // Reduce TLS initial exec model to TLS local exec model.
} // Sequences like
if(ctxt->headtype == Hlinux || ctxt->headtype == Hfreebsd // MOVQ TLS, BX
|| ctxt->headtype == Hopenbsd || ctxt->headtype == Hnetbsd // ... off(BX)(TLS*1) ...
|| ctxt->headtype == Hplan9 || ctxt->headtype == Hdragonfly // become
|| ctxt->headtype == Hsolaris) { // NOP
// ELF uses FS instead of GS. // ... off(TLS) ...
if(p->from.type == D_INDIR+D_GS) //
p->from.type = D_INDIR+D_FS; // TODO(rsc): Remove the Hsolaris special case. It exists only to
if(p->to.type == D_INDIR+D_GS) // guarantee we are producing byte-identical binaries as before this code.
p->to.type = D_INDIR+D_FS; // But it should be unnecessary.
if(p->from.index == D_GS) if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris)
p->from.index = D_FS; nopout(p);
if(p->to.index == D_GS) if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) {
p->to.index = D_FS; p->from.type = D_INDIR+D_TLS;
} p->from.scale = 0;
if(!ctxt->flag_shared) {
// Convert g() or m() accesses of the form
// op n(reg)(GS*1), reg
// to
// op n(GS*1), reg
if(p->from.index == D_FS || p->from.index == D_GS) {
p->from.type = D_INDIR + p->from.index;
p->from.index = D_NONE; p->from.index = D_NONE;
} }
// Convert g() or m() accesses of the form if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) {
// op reg, n(reg)(GS*1) p->to.type = D_INDIR+D_TLS;
// to p->to.scale = 0;
// op reg, n(GS*1)
if(p->to.index == D_FS || p->to.index == D_GS) {
p->to.type = D_INDIR + p->to.index;
p->to.index = D_NONE; p->to.index = D_NONE;
} }
// Convert get_tls access of the form
// op runtime.tlsgm(SB), reg
// to
// NOP
if(ctxt->gmsym != nil && p->from.sym == ctxt->gmsym) {
p->as = ANOP;
p->from.type = D_NONE;
p->to.type = D_NONE;
p->from.sym = nil;
p->to.sym = nil;
}
} else { } else {
// Convert TLS reads of the form // As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
// op n(GS), reg // The instruction
// to // MOVQ off(TLS), BX
// MOVQ $runtime.tlsgm(SB), reg // becomes the sequence
// op n(reg)(GS*1), reg // MOVQ TLS, BX
if((p->from.type == D_INDIR+D_FS || p->from.type == D_INDIR + D_GS) && p->to.type >= D_AX && p->to.type <= D_DI) { // MOVQ off(BX)(TLS*1), BX
// This allows the C compilers to emit references to m and g using the direct off(TLS) form.
if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) {
q = appendp(ctxt, p); q = appendp(ctxt, p);
q->to = p->to;
q->as = p->as; q->as = p->as;
q->from.type = D_INDIR+p->to.type; q->from = p->from;
q->from.index = p->from.type - D_INDIR; q->from.type = D_INDIR + p->to.type;
q->from.scale = 1; q->from.index = D_TLS;
q->from.offset = p->from.offset; q->from.scale = 2; // TODO: use 1
p->as = AMOVQ; q->to = p->to;
p->from.type = D_EXTERN; p->from.type = D_TLS;
p->from.sym = ctxt->gmsym; p->from.index = D_NONE;
p->from.offset = 0; p->from.offset = 0;
} }
} }
// TODO: Remove.
if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) {
if(p->from.scale == 1 && p->from.index == D_TLS)
p->from.scale = 2;
if(p->to.scale == 1 && p->to.index == D_TLS)
p->to.scale = 2;
}
if(ctxt->headtype == Hnacl) {
nacladdr(ctxt, p, &p->from);
nacladdr(ctxt, p, &p->to);
}
// Maintain information about code generation mode. // Maintain information about code generation mode.
if(ctxt->mode == 0) if(ctxt->mode == 0)
ctxt->mode = 64; ctxt->mode = 64;
...@@ -315,9 +319,9 @@ nacladdr(Link *ctxt, Prog *p, Addr *a) ...@@ -315,9 +319,9 @@ nacladdr(Link *ctxt, Prog *p, Addr *a)
ctxt->diag("invalid address: %P", p); ctxt->diag("invalid address: %P", p);
return; return;
} }
if(a->type == D_INDIR+D_GS) if(a->type == D_INDIR+D_TLS)
a->type = D_INDIR+D_BP; a->type = D_INDIR+D_BP;
else if(a->type == D_GS) else if(a->type == D_TLS)
a->type = D_BP; a->type = D_BP;
if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) { if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) {
switch(a->type) { switch(a->type) {
...@@ -632,48 +636,24 @@ indir_cx(Link *ctxt, Addr *a) ...@@ -632,48 +636,24 @@ indir_cx(Link *ctxt, Addr *a)
// Returns last new instruction. // Returns last new instruction.
static Prog* static Prog*
load_g_cx(Link *ctxt, Prog *p) load_g_cx(Link *ctxt, Prog *p)
{ {
if(ctxt->flag_shared) { Prog *next;
// Load TLS offset with MOVQ $runtime.tlsgm(SB), CX
p->as = AMOVQ;
p->from.type = D_EXTERN;
p->from.sym = ctxt->gmsym;
p->to.type = D_CX;
p = appendp(ctxt, p);
}
p->as = AMOVQ; p->as = AMOVQ;
if(ctxt->headtype == Hlinux || ctxt->headtype == Hfreebsd if(ctxt->arch->ptrsize == 4)
|| ctxt->headtype == Hopenbsd || ctxt->headtype == Hnetbsd
|| ctxt->headtype == Hplan9 || ctxt->headtype == Hdragonfly
|| ctxt->headtype == Hsolaris)
// ELF uses FS
p->from.type = D_INDIR+D_FS;
else if(ctxt->headtype == Hnacl) {
p->as = AMOVL; p->as = AMOVL;
p->from.type = D_INDIR+D_BP; p->from.type = D_INDIR+D_TLS;
} else p->from.offset = 0;
p->from.type = D_INDIR+D_GS;
if(ctxt->flag_shared) {
// Add TLS offset stored in CX
p->from.index = p->from.type - D_INDIR;
indir_cx(ctxt, &p->from);
}
p->from.offset = ctxt->tlsoffset+0;
p->to.type = D_CX; p->to.type = D_CX;
if(ctxt->headtype == Hwindows) {
// movq %gs:0x28, %rcx next = p->link;
// movq (%rcx), %rcx progedit(ctxt, p);
p->as = AMOVQ; while(p->link != next)
p->from.type = D_INDIR+D_GS; p = p->link;
p->from.offset = 0x28;
p->to.type = D_CX; if(p->from.index == D_TLS)
p->from.scale = 2;
p = appendp(ctxt, p);
p->as = AMOVQ;
indir_cx(ctxt, &p->from);
p->from.offset = 0;
p->to.type = D_CX;
}
return p; return p;
} }
......
...@@ -91,80 +91,80 @@ settextflag(Prog *p, int f) ...@@ -91,80 +91,80 @@ settextflag(Prog *p, int f)
p->from.scale = f; p->from.scale = f;
} }
static int
canuselocaltls(Link *ctxt)
{
switch(ctxt->headtype) {
case Hlinux:
case Hnacl:
case Hplan9:
case Hwindows:
return 0;
}
return 1;
}
static void static void
progedit(Link *ctxt, Prog *p) progedit(Link *ctxt, Prog *p)
{ {
Prog *q;
char literal[64]; char literal[64];
LSym *s; LSym *s;
Prog *q;
if(p->from.type == D_INDIR+D_GS)
p->from.offset += ctxt->tlsoffset; // See obj6.c for discussion of TLS.
if(p->to.type == D_INDIR+D_GS) if(canuselocaltls(ctxt)) {
p->to.offset += ctxt->tlsoffset; // Reduce TLS initial exec model to TLS local exec model.
// Sequences like
if(ctxt->headtype == Hwindows) { // MOVL TLS, BX
// Convert // ... off(BX)(TLS*1) ...
// op n(GS), reg // become
// to // NOP
// MOVL 0x14(FS), reg // ... off(TLS) ...
// op n(reg), reg if(p->as == AMOVL && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_DI) {
// The purpose of this patch is to fix some accesses p->as = ANOP;
// to extern register variables (TLS) on Windows, as p->from.type = D_NONE;
// a different method is used to access them. p->to.type = D_NONE;
if(p->from.type == D_INDIR+D_GS
&& p->to.type >= D_AX && p->to.type <= D_DI) {
q = appendp(ctxt, p);
q->from = p->from;
q->from.type = D_INDIR + p->to.type;
q->to = p->to;
q->as = p->as;
p->as = AMOVL;
p->from.type = D_INDIR+D_FS;
p->from.offset = 0x14;
} }
} if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_DI) {
if(ctxt->headtype == Hlinux || ctxt->headtype == Hnacl) { p->from.type = D_INDIR+D_TLS;
// Running binaries under Xen requires using p->from.scale = 0;
// MOVL 0(GS), reg p->from.index = D_NONE;
// and then off(reg) instead of saying off(GS) directly
// when the offset is negative.
// In external mode we just produce a reloc.
if(p->from.type == D_INDIR+D_GS && p->from.offset < 0
&& p->to.type >= D_AX && p->to.type <= D_DI) {
if(ctxt->linkmode != LinkExternal) {
q = appendp(ctxt, p);
q->from = p->from;
q->from.type = D_INDIR + p->to.type;
q->to = p->to;
q->as = p->as;
p->as = AMOVL;
p->from.type = D_INDIR+D_GS;
p->from.offset = 0;
} else {
// Add signals to relocate.
p->from.index = D_GS;
p->from.scale = 1;
}
} }
} if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_DI) {
if(ctxt->headtype == Hplan9) { p->to.type = D_INDIR+D_TLS;
if(p->from.type == D_INDIR+D_GS p->to.scale = 0;
&& p->to.type >= D_AX && p->to.type <= D_DI) { p->to.index = D_NONE;
if(ctxt->plan9tos == nil) }
ctxt->plan9tos = linklookup(ctxt, "_tos", 0); } else {
// As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
// The instruction
// MOVL off(TLS), BX
// becomes the sequence
// MOVL TLS, BX
// MOVL off(BX)(TLS*1), BX
// This allows the C compilers to emit references to m and g using the direct off(TLS) form.
if(p->as == AMOVL && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_DI) {
q = appendp(ctxt, p); q = appendp(ctxt, p);
q->as = p->as;
q->from = p->from; q->from = p->from;
q->from.type = D_INDIR + p->to.type; q->from.type = D_INDIR + p->to.type;
q->from.index = D_TLS;
q->from.scale = 2; // TODO: use 1
q->to = p->to; q->to = p->to;
q->as = p->as; p->from.type = D_TLS;
p->as = AMOVL; p->from.index = D_NONE;
p->from.type = D_EXTERN;
p->from.sym = ctxt->plan9tos;
p->from.offset = 0; p->from.offset = 0;
} }
} }
// TODO: Remove.
if(ctxt->headtype == Hplan9) {
if(p->from.scale == 1 && p->from.index == D_TLS)
p->from.scale = 2;
if(p->to.scale == 1 && p->to.index == D_TLS)
p->to.scale = 2;
}
// Rewrite CALL/JMP/RET to symbol as D_BRANCH. // Rewrite CALL/JMP/RET to symbol as D_BRANCH.
switch(p->as) { switch(p->as) {
case ACALL: case ACALL:
...@@ -435,62 +435,21 @@ addstacksplit(Link *ctxt, LSym *cursym) ...@@ -435,62 +435,21 @@ addstacksplit(Link *ctxt, LSym *cursym)
static Prog* static Prog*
load_g_cx(Link *ctxt, Prog *p) load_g_cx(Link *ctxt, Prog *p)
{ {
switch(ctxt->headtype) { Prog *next;
case Hwindows:
p->as = AMOVL;
p->from.type = D_INDIR+D_FS;
p->from.offset = 0x14;
p->to.type = D_CX;
p = appendp(ctxt, p); p->as = AMOVL;
p->as = AMOVL; p->from.type = D_INDIR+D_TLS;
p->from.type = D_INDIR+D_CX; p->from.offset = 0;
p->from.offset = 0; p->to.type = D_CX;
p->to.type = D_CX;
break; next = p->link;
progedit(ctxt, p);
while(p->link != next)
p = p->link;
case Hlinux: if(p->from.index == D_TLS)
case Hnacl: p->from.scale = 2;
if(ctxt->linkmode != LinkExternal) {
p->as = AMOVL;
p->from.type = D_INDIR+D_GS;
p->from.offset = 0;
p->to.type = D_CX;
p = appendp(ctxt, p);
p->as = AMOVL;
p->from.type = D_INDIR+D_CX;
p->from.offset = ctxt->tlsoffset + 0;
p->to.type = D_CX;
} else {
p->as = AMOVL;
p->from.type = D_INDIR+D_GS;
p->from.offset = ctxt->tlsoffset + 0;
p->to.type = D_CX;
p->from.index = D_GS;
p->from.scale = 1;
}
break;
case Hplan9:
p->as = AMOVL;
p->from.type = D_EXTERN;
p->from.sym = ctxt->plan9tos;
p->to.type = D_CX;
p = appendp(ctxt, p);
p->as = AMOVL;
p->from.type = D_INDIR+D_CX;
p->from.offset = ctxt->tlsoffset + 0;
p->to.type = D_CX;
break;
default:
p->as = AMOVL;
p->from.type = D_INDIR+D_GS;
p->from.offset = ctxt->tlsoffset + 0;
p->to.type = D_CX;
}
return p; return p;
} }
......
...@@ -274,6 +274,7 @@ writesym(Link *ctxt, Biobuf *b, LSym *s) ...@@ -274,6 +274,7 @@ writesym(Link *ctxt, Biobuf *b, LSym *s)
Pcln *pc; Pcln *pc;
Prog *p; Prog *p;
Auto *a; Auto *a;
char *name;
if(ctxt->debugasm) { if(ctxt->debugasm) {
Bprint(ctxt->bso, "%s ", s->name); Bprint(ctxt->bso, "%s ", s->name);
...@@ -308,7 +309,10 @@ writesym(Link *ctxt, Biobuf *b, LSym *s) ...@@ -308,7 +309,10 @@ writesym(Link *ctxt, Biobuf *b, LSym *s)
} }
for(i=0; i<s->nr; i++) { for(i=0; i<s->nr; i++) {
r = &s->r[i]; r = &s->r[i];
Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, r->sym->name, (vlong)r->add); name = "";
if(r->sym != nil)
name = r->sym->name;
Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, name, (vlong)r->add);
} }
} }
......
...@@ -118,6 +118,7 @@ linknew(LinkArch *arch) ...@@ -118,6 +118,7 @@ linknew(LinkArch *arch)
sysfatal("unknown goos %s", getgoos()); sysfatal("unknown goos %s", getgoos());
// Record thread-local storage offset. // Record thread-local storage offset.
// TODO(rsc): Move tlsoffset back into the linker.
switch(ctxt->headtype) { switch(ctxt->headtype) {
default: default:
sysfatal("unknown thread-local storage offset for %s", headstr(ctxt->headtype)); sysfatal("unknown thread-local storage offset for %s", headstr(ctxt->headtype));
......
...@@ -99,10 +99,10 @@ typedef struct DebugVars DebugVars; ...@@ -99,10 +99,10 @@ typedef struct DebugVars DebugVars;
* *
* "extern register" is a special storage class implemented by 6c, 8c, etc. * "extern register" is a special storage class implemented by 6c, 8c, etc.
* On the ARM, it is an actual register; elsewhere it is a slot in thread- * On the ARM, it is an actual register; elsewhere it is a slot in thread-
* local storage indexed by a segment register. See zasmhdr in * local storage indexed by a pseudo-register TLS. See zasmhdr in
* src/cmd/dist/buildruntime.c for details, and be aware that the linker may * src/cmd/dist/buildruntime.c for details, and be aware that the linker may
* make further OS-specific changes to the compiler's output. For example, * make further OS-specific changes to the compiler's output. For example,
* 6l/linux rewrites 0(GS) as -16(FS). * 6l/linux rewrites 0(TLS) as -16(FS).
* *
* Every C file linked into a Go program must include runtime.h so that the * Every C file linked into a Go program must include runtime.h so that the
* C compiler (6c, 8c, etc.) knows to avoid other uses of these dedicated * C compiler (6c, 8c, etc.) knows to avoid other uses of these dedicated
......
...@@ -457,8 +457,7 @@ TEXT runtime·setldt(SB),NOSPLIT,$32 ...@@ -457,8 +457,7 @@ TEXT runtime·setldt(SB),NOSPLIT,$32
* we use its pthread_create and let it set up %gs * we use its pthread_create and let it set up %gs
* for us. When we do that, the private storage * for us. When we do that, the private storage
* we get is not at 0(GS) but at 0x468(GS). * we get is not at 0(GS) but at 0x468(GS).
* To insulate the rest of the tool chain from this ugliness, * 8l rewrites 0(TLS) into 0x468(GS) for us.
* 8l rewrites 0(GS) into 0x468(GS) for us.
* To accommodate that rewrite, we translate the * To accommodate that rewrite, we translate the
* address and limit here so that 0x468(GS) maps to 0(address). * address and limit here so that 0x468(GS) maps to 0(address).
* *
......
...@@ -383,7 +383,7 @@ TEXT runtime·setldt(SB),NOSPLIT,$32 ...@@ -383,7 +383,7 @@ TEXT runtime·setldt(SB),NOSPLIT,$32
* for us. When we do that, the private storage * for us. When we do that, the private storage
* we get is not at 0(GS), 4(GS), but -8(GS), -4(GS). * we get is not at 0(GS), 4(GS), but -8(GS), -4(GS).
* To insulate the rest of the tool chain from this * To insulate the rest of the tool chain from this
* ugliness, 8l rewrites 0(GS) into -8(GS) for us. * ugliness, 8l rewrites 0(TLS) into -8(GS) for us.
* To accommodate that rewrite, we translate * To accommodate that rewrite, we translate
* the address here and bump the limit to 0xffffffff (no limit) * the address here and bump the limit to 0xffffffff (no limit)
* so that -8(GS) maps to 0(address). * so that -8(GS) maps to 0(address).
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
MOVL $(0x10000 + ((code)<<5)), AX; JMP AX MOVL $(0x10000 + ((code)<<5)), AX; JMP AX
TEXT runtime·settls(SB),NOSPLIT,$0 TEXT runtime·settls(SB),NOSPLIT,$0
MOVL DI, GS // really BP MOVL DI, TLS // really BP
RET RET
TEXT runtime·exit(SB),NOSPLIT,$0 TEXT runtime·exit(SB),NOSPLIT,$0
...@@ -173,7 +173,7 @@ TEXT runtime·nacl_thread_create(SB),NOSPLIT,$0 ...@@ -173,7 +173,7 @@ TEXT runtime·nacl_thread_create(SB),NOSPLIT,$0
TEXT runtime·mstart_nacl(SB),NOSPLIT,$0 TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
NACL_SYSCALL(SYS_tls_get) NACL_SYSCALL(SYS_tls_get)
SUBL $8, AX SUBL $8, AX
MOVL AX, GS MOVL AX, TLS
JMP runtime·mstart(SB) JMP runtime·mstart(SB)
TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$0 TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$0
...@@ -254,12 +254,12 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$80 ...@@ -254,12 +254,12 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$80
// restore TLS register at time of execution, // restore TLS register at time of execution,
// in case it's been smashed. // in case it's been smashed.
// the TLS register is really BP, but for consistency // the TLS register is really BP, but for consistency
// with non-NaCl systems it is referred to here as GS. // with non-NaCl systems it is referred to here as TLS.
// NOTE: Cannot use SYS_tls_get here (like we do in mstart_nacl), // NOTE: Cannot use SYS_tls_get here (like we do in mstart_nacl),
// because the main thread never calls tls_set. // because the main thread never calls tls_set.
LEAL ctxt+0(FP), AX LEAL ctxt+0(FP), AX
MOVL (16*4+5*8)(AX), AX MOVL (16*4+5*8)(AX), AX
MOVL AX, GS MOVL AX, TLS
// check that m exists // check that m exists
get_tls(CX) get_tls(CX)
...@@ -305,7 +305,7 @@ sigtramp_ret: ...@@ -305,7 +305,7 @@ sigtramp_ret:
MOVQ 16(SI), DX MOVQ 16(SI), DX
MOVQ 24(SI), BX MOVQ 24(SI), BX
MOVL 32(SI), SP // MOVL for SP sandboxing MOVL 32(SI), SP // MOVL for SP sandboxing
// 40(SI) is saved BP aka GS, already restored above // 40(SI) is saved BP aka TLS, already restored above
// 48(SI) is saved SI, never to be seen again // 48(SI) is saved SI, never to be seen again
MOVQ 56(SI), DI MOVQ 56(SI), DI
MOVQ 64(SI), R8 MOVQ 64(SI), R8
......
...@@ -100,8 +100,9 @@ TEXT runtime·rfork(SB),NOSPLIT,$0 ...@@ -100,8 +100,9 @@ TEXT runtime·rfork(SB),NOSPLIT,$0
MOVL DX, g(AX) MOVL DX, g(AX)
MOVL BX, m(AX) MOVL BX, m(AX)
// Initialize AX from TOS struct. // Initialize procid from TOS struct.
MOVL procid(AX), AX // TODO: Be explicit and insert a new MOVL _tos(SB), AX here.
MOVL 48(AX), AX // procid
MOVL AX, m_procid(BX) // save pid as m->procid MOVL AX, m_procid(BX) // save pid as m->procid
CALL runtime·stackcheck(SB) // smashes AX, CX CALL runtime·stackcheck(SB) // smashes AX, CX
......
...@@ -136,7 +136,7 @@ TEXT runtime·rfork(SB),NOSPLIT,$0 ...@@ -136,7 +136,7 @@ TEXT runtime·rfork(SB),NOSPLIT,$0
MOVQ BX, m(AX) MOVQ BX, m(AX)
// Initialize AX from pid in TLS. // Initialize AX from pid in TLS.
MOVQ procid(AX), AX MOVQ 0(FS), AX
MOVQ AX, m_procid(BX) // save pid as m->procid MOVQ AX, m_procid(BX) // save pid as m->procid
CALL runtime·stackcheck(SB) // smashes AX, CX CALL runtime·stackcheck(SB) // smashes AX, CX
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment