Commit b3a854c7 authored by Keith Randall's avatar Keith Randall

runtime: use fixed TLS offsets on darwin/amd64 and darwin/386

Fixes #23617

Note that this CL does not affect darwin/arm and darwin/arm64,
still TBD what, if anything, needs to be done for those.

This is a fix of CL 105975 which was reverted in CL 106155.
Needed to use movl instead of movq for 386.

Change-Id: I0db7f8087173869e60cc22c6c3124fa0a0739b46
Reviewed-on: https://go-review.googlesource.com/106156
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 3d501df4
...@@ -113,24 +113,30 @@ func (ctxt *Link) computeTLSOffset() { ...@@ -113,24 +113,30 @@ func (ctxt *Link) computeTLSOffset() {
/* /*
* OS X system constants - offset from 0(GS) to our TLS. * OS X system constants - offset from 0(GS) to our TLS.
* Explained in src/runtime/cgo/gcc_darwin_*.c.
*/ */
case objabi.Hdarwin: case objabi.Hdarwin:
switch ctxt.Arch.Family { switch ctxt.Arch.Family {
default: default:
log.Fatalf("unknown thread-local storage offset for darwin/%s", ctxt.Arch.Name) log.Fatalf("unknown thread-local storage offset for darwin/%s", ctxt.Arch.Name)
case sys.ARM: /*
ctxt.Tlsoffset = 0 // dummy value, not needed * For x86, Apple has reserved a slot in the TLS for Go. See issue 23617.
* That slot is at offset 0x30 on amd64, and 0x18 on 386.
* The slot will hold the G pointer.
* These constants should match those in runtime/sys_darwin_{386,amd64}.s
* and runtime/cgo/gcc_darwin_{386,amd64}.c.
*/
case sys.I386:
ctxt.Tlsoffset = 0x18
case sys.AMD64: case sys.AMD64:
ctxt.Tlsoffset = 0x8a0 ctxt.Tlsoffset = 0x30
case sys.ARM64: case sys.ARM:
ctxt.Tlsoffset = 0 // dummy value, not needed ctxt.Tlsoffset = 0 // dummy value, not needed
case sys.I386: case sys.ARM64:
ctxt.Tlsoffset = 0x468 ctxt.Tlsoffset = 0 // dummy value, not needed
} }
} }
......
...@@ -9,89 +9,6 @@ ...@@ -9,89 +9,6 @@
#include "libcgo_unix.h" #include "libcgo_unix.h"
static void* threadentry(void*); static void* threadentry(void*);
static pthread_key_t k1;
#define magic1 (0x23581321U)
static void
inittls(void)
{
uint32 x;
pthread_key_t tofree[128], k;
int i, ntofree;
/*
* Allocate thread-local storage slot for g.
* The key numbers start at 0x100, and we expect to be
* one of the early calls to pthread_key_create, so we
* should be able to get a pretty low number.
*
* In Darwin/386 pthreads, %gs points at the thread
* structure, and each key is an index into the thread-local
* storage array that begins at offset 0x48 within in that structure.
* It may happen that we are not quite the first function to try
* to allocate thread-local storage keys, so instead of depending
* on getting 0x100, we try for 0x108, allocating keys until
* we get the one we want and then freeing the ones we didn't want.
*
* Thus the final offset to use in %gs references is
* 0x48+4*0x108 = 0x468.
*
* The linker and runtime hard-code this constant offset
* from %gs where we expect to find g.
* Known to src/cmd/link/internal/ld/sym.go:/0x468
* and to src/runtime/sys_darwin_386.s:/0x468
*
* This is truly disgusting and a bit fragile, but taking care
* of it here protects the rest of the system from damage.
* The alternative would be to use a global variable that
* held the offset and refer to that variable each time we
* need a %gs variable (g). That approach would
* require an extra instruction and memory reference in
* every stack growth prolog and would also require
* rewriting the code that 8c generates for extern registers.
*
* Things get more disgusting on OS X 10.7 Lion.
* The 0x48 base mentioned above is the offset of the tsd
* array within the per-thread structure on Leopard and Snow Leopard.
* On Lion, the base moved a little, so while the math above
* still applies, the base is different. Thus, we cannot
* look for specific key values if we want to build binaries
* that run on both systems. Instead, forget about the
* specific key values and just allocate and initialize per-thread
* storage until we find a key that writes to the memory location
* we want. Then keep that key.
*/
ntofree = 0;
for(;;) {
if(pthread_key_create(&k, nil) != 0) {
fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
abort();
}
pthread_setspecific(k, (void*)magic1);
asm volatile("movl %%gs:0x468, %0" : "=r"(x));
pthread_setspecific(k, 0);
if(x == magic1) {
k1 = k;
break;
}
if(ntofree >= nelem(tofree)) {
fprintf(stderr, "runtime/cgo: could not obtain pthread_keys\n");
fprintf(stderr, "\ttried");
for(i=0; i<ntofree; i++)
fprintf(stderr, " %#x", (unsigned)tofree[i]);
fprintf(stderr, "\n");
abort();
}
tofree[ntofree++] = k;
}
/*
* We got the key we wanted. Free the others.
*/
for(i=0; i<ntofree; i++)
pthread_key_delete(tofree[i]);
}
void void
x_cgo_init(G *g) x_cgo_init(G *g)
...@@ -103,8 +20,6 @@ x_cgo_init(G *g) ...@@ -103,8 +20,6 @@ x_cgo_init(G *g)
pthread_attr_getstacksize(&attr, &size); pthread_attr_getstacksize(&attr, &size);
g->stacklo = (uintptr)&attr - size + 4096; g->stacklo = (uintptr)&attr - size + 4096;
pthread_attr_destroy(&attr); pthread_attr_destroy(&attr);
inittls();
} }
...@@ -142,10 +57,9 @@ threadentry(void *v) ...@@ -142,10 +57,9 @@ threadentry(void *v)
ts = *(ThreadStart*)v; ts = *(ThreadStart*)v;
free(v); free(v);
if (pthread_setspecific(k1, (void*)ts.g) != 0) { // Move the g pointer into the slot reserved in thread local storage.
fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n"); // Constant must match the one in cmd/link/internal/ld/sym.go.
abort(); asm volatile("movl %0, %%gs:0x18" :: "r"(ts.g));
}
crosscall_386(ts.fn); crosscall_386(ts.fn);
return nil; return nil;
......
...@@ -9,60 +9,6 @@ ...@@ -9,60 +9,6 @@
#include "libcgo_unix.h" #include "libcgo_unix.h"
static void* threadentry(void*); static void* threadentry(void*);
static pthread_key_t k1;
#define magic1 (0x23581321345589ULL)
static void
inittls(void)
{
uint64 x;
pthread_key_t tofree[128], k;
int i, ntofree;
/*
* Same logic, code as darwin_386.c:/inittls, except that words
* are 8 bytes long now, and the thread-local storage starts
* at 0x60 on Leopard / Snow Leopard. So the offset is
* 0x60+8*0x108 = 0x8a0.
*
* The linker and runtime hard-code this constant offset
* from %gs where we expect to find g.
* Known to src/cmd/link/internal/ld/sym.go:/0x8a0
* and to src/runtime/sys_darwin_amd64.s:/0x8a0
*
* As disgusting as on the 386; same justification.
*/
ntofree = 0;
for(;;) {
if(pthread_key_create(&k, nil) != 0) {
fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
abort();
}
pthread_setspecific(k, (void*)magic1);
asm volatile("movq %%gs:0x8a0, %0" : "=r"(x));
pthread_setspecific(k, 0);
if(x == magic1) {
k1 = k;
break;
}
if(ntofree >= nelem(tofree)) {
fprintf(stderr, "runtime/cgo: could not obtain pthread_keys\n");
fprintf(stderr, "\ttried");
for(i=0; i<ntofree; i++)
fprintf(stderr, " %#x", (unsigned)tofree[i]);
fprintf(stderr, "\n");
abort();
}
tofree[ntofree++] = k;
}
/*
* We got the key we wanted. Free the others.
*/
for(i=0; i<ntofree; i++)
pthread_key_delete(tofree[i]);
}
void void
x_cgo_init(G *g) x_cgo_init(G *g)
...@@ -74,8 +20,6 @@ x_cgo_init(G *g) ...@@ -74,8 +20,6 @@ x_cgo_init(G *g)
pthread_attr_getstacksize(&attr, &size); pthread_attr_getstacksize(&attr, &size);
g->stacklo = (uintptr)&attr - size + 4096; g->stacklo = (uintptr)&attr - size + 4096;
pthread_attr_destroy(&attr); pthread_attr_destroy(&attr);
inittls();
} }
...@@ -113,10 +57,9 @@ threadentry(void *v) ...@@ -113,10 +57,9 @@ threadentry(void *v)
ts = *(ThreadStart*)v; ts = *(ThreadStart*)v;
free(v); free(v);
if (pthread_setspecific(k1, (void*)ts.g) != 0) { // Move the g pointer into the slot reserved in thread local storage.
fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n"); // Constant must match the one in cmd/link/internal/ld/sym.go.
abort(); asm volatile("movq %0, %%gs:0x30" :: "r"(ts.g));
}
crosscall_amd64(ts.fn); crosscall_amd64(ts.fn);
return nil; return nil;
......
...@@ -525,15 +525,14 @@ TEXT runtime·setldt(SB),NOSPLIT,$32 ...@@ -525,15 +525,14 @@ TEXT runtime·setldt(SB),NOSPLIT,$32
* When linking against the system libraries, * When linking against the system libraries,
* we use its pthread_create and let it set up %gs * we use its pthread_create and let it set up %gs
* for us. When we do that, the private storage * for us. When we do that, the private storage
* we get is not at 0(GS) but at 0x468(GS). * we get is not at 0(GS) but at 0x18(GS).
* 8l rewrites 0(TLS) into 0x468(GS) for us. * The linker rewrites 0(TLS) into 0x18(GS) for us.
* To accommodate that rewrite, we translate the * To accommodate that rewrite, we translate the
* address and limit here so that 0x468(GS) maps to 0(address). * address here so that 0x18(GS) maps to 0(address).
* *
* See cgo/gcc_darwin_386.c:/468 for the derivation * Constant must match the one in cmd/link/internal/ld/sym.go.
* of the constant.
*/ */
SUBL $0x468, BX SUBL $0x18, BX
/* /*
* Must set up as USER_CTHREAD segment because * Must set up as USER_CTHREAD segment because
......
...@@ -569,11 +569,10 @@ TEXT runtime·mach_semaphore_signal_all(SB),NOSPLIT,$0 ...@@ -569,11 +569,10 @@ TEXT runtime·mach_semaphore_signal_all(SB),NOSPLIT,$0
// set tls base to DI // set tls base to DI
TEXT runtime·settls(SB),NOSPLIT,$32 TEXT runtime·settls(SB),NOSPLIT,$32
/* /*
* Same as in sys_darwin_386.s:/ugliness, different constant. * Same as in sys_darwin_386.s, but a different constant.
* See cgo/gcc_darwin_amd64.c for the derivation * Constant must match the one in cmd/link/internal/ld/sym.go.
* of the constant.
*/ */
SUBQ $0x8a0, DI SUBQ $0x30, DI
MOVL $(0x3000000+3), AX // thread_fast_set_cthread_self - machdep call #3 MOVL $(0x3000000+3), AX // thread_fast_set_cthread_self - machdep call #3
SYSCALL SYSCALL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment