Commit d8c0bbd8 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Change stack crawling to not be based on unwinding

We always want to crawl the entire stack, and it's
possible to determine the extents of the stack, so just
do a scan over the entire memory range.

Also, change the way the interpreter keeps track of its roots;
we don't really need to associate the roots with a specific
interpreter frame.

This should hopefully clear up the weirdness about libunwind
trying to unwind through the pthreads assembly code, and potentially
also make stack crawling faster.
parent 9adb8f17
...@@ -66,8 +66,8 @@ git clone git://git.sv.gnu.org/libunwind.git libunwind-trunk ...@@ -66,8 +66,8 @@ git clone git://git.sv.gnu.org/libunwind.git libunwind-trunk
mkdir libunwind-trunk-install mkdir libunwind-trunk-install
cd libunwind-trunk cd libunwind-trunk
git checkout 65ac867416 git checkout 65ac867416
# disable shared libraries because we'll be installing this in a place that the loader can't find it:
autoreconf -i autoreconf -i
# disable shared libraries because we'll be installing this in a place that the loader can't find it:
./configure --prefix=$HOME/pyston_deps/libunwind-trunk-install --enable-shared=0 ./configure --prefix=$HOME/pyston_deps/libunwind-trunk-install --enable-shared=0
make -j4 make -j4
make install make install
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "codegen/irgen/util.h" #include "codegen/irgen/util.h"
#include "core/common.h" #include "core/common.h"
#include "core/stats.h" #include "core/stats.h"
#include "core/thread_utils.h"
#include "core/util.h" #include "core/util.h"
//#undef VERBOSITY //#undef VERBOSITY
...@@ -206,25 +207,23 @@ static void set(SymMap& symbols, const llvm::BasicBlock::iterator& it, Val v) { ...@@ -206,25 +207,23 @@ static void set(SymMap& symbols, const llvm::BasicBlock::iterator& it, Val v) {
//#define SET(v) symbols.insert(std::make_pair(static_cast<llvm::Value*>(&(*it)), Val(v))) //#define SET(v) symbols.insert(std::make_pair(static_cast<llvm::Value*>(&(*it)), Val(v)))
} }
static std::unordered_map<void*, const SymMap*> interpreter_roots;
static std::unordered_map<void*, llvm::Instruction*> cur_instruction_map; static std::unordered_map<void*, llvm::Instruction*> cur_instruction_map;
void gatherInterpreterRootsForFrame(GCVisitor* visitor, void* frame_ptr) { typedef std::vector<const SymMap*> root_stack_t;
auto it = interpreter_roots.find(frame_ptr); threading::PerThreadSet<root_stack_t> root_stack_set;
if (it == interpreter_roots.end()) { threading::PerThread<root_stack_t> thread_local root_stack(&root_stack_set);
printf("%p is not an interpreter frame; they are", frame_ptr);
for (const auto& p2 : interpreter_roots) {
printf(" %p", p2.first);
}
printf("\n");
abort();
}
// printf("Gathering roots for frame %p\n", frame_ptr); void gatherInterpreterRoots(GCVisitor* visitor) {
const SymMap* symbols = it->second; // In theory this lock should be superfluous since we should only call this
// inside a sequential section, but lock it anyway:
threading::LockedRegion _lock(&root_stack_set.lock);
for (const auto& p2 : *symbols) { for (auto& p : root_stack_set.map) {
visitor->visitPotential(p2.second.o); for (const SymMap* sym_map : *p.second) {
for (const auto& p2 : *sym_map) {
visitor->visitPotential(p2.second.o);
}
}
} }
} }
...@@ -236,8 +235,7 @@ public: ...@@ -236,8 +235,7 @@ public:
constexpr UnregisterHelper(void* frame_ptr) : frame_ptr(frame_ptr) {} constexpr UnregisterHelper(void* frame_ptr) : frame_ptr(frame_ptr) {}
~UnregisterHelper() { ~UnregisterHelper() {
assert(interpreter_roots.count(frame_ptr)); root_stack.value.pop_back();
interpreter_roots.erase(frame_ptr);
assert(cur_instruction_map.count(frame_ptr)); assert(cur_instruction_map.count(frame_ptr));
cur_instruction_map.erase(frame_ptr); cur_instruction_map.erase(frame_ptr);
...@@ -282,7 +280,7 @@ Box* interpretFunction(llvm::Function* f, int nargs, Box* arg1, Box* arg2, Box* ...@@ -282,7 +280,7 @@ Box* interpretFunction(llvm::Function* f, int nargs, Box* arg1, Box* arg2, Box*
SymMap symbols; SymMap symbols;
void* frame_ptr = __builtin_frame_address(0); void* frame_ptr = __builtin_frame_address(0);
interpreter_roots[frame_ptr] = &symbols; root_stack.value.push_back(&symbols);
UnregisterHelper helper(frame_ptr); UnregisterHelper helper(frame_ptr);
int arg_num = -1; int arg_num = -1;
......
...@@ -27,7 +27,7 @@ class LineInfo; ...@@ -27,7 +27,7 @@ class LineInfo;
Box* interpretFunction(llvm::Function* f, int nargs, Box* arg1, Box* arg2, Box* arg3, Box** args); Box* interpretFunction(llvm::Function* f, int nargs, Box* arg1, Box* arg2, Box* arg3, Box** args);
void gatherInterpreterRootsForFrame(GCVisitor* visitor, void* frame_ptr); void gatherInterpreterRoots(GCVisitor* visitor);
const LineInfo* getLineInfoForInterpretedFrame(void* frame_ptr); const LineInfo* getLineInfoForInterpretedFrame(void* frame_ptr);
} }
......
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#define STACK_GROWS_DOWN 1
#define _STRINGIFY(N) #N #define _STRINGIFY(N) #N
#define STRINGIFY(N) _STRINGIFY(N) #define STRINGIFY(N) _STRINGIFY(N)
......
// Copyright (c) 2014 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PYSTON_CORE_THREADUTILS_H
#define PYSTON_CORE_THREADUTILS_H
#include <pthread.h>
namespace pyston {
namespace threading {
class LockedRegion {
private:
pthread_mutex_t* mutex;
public:
LockedRegion(pthread_mutex_t* mutex) : mutex(mutex) { pthread_mutex_lock(mutex); }
~LockedRegion() { pthread_mutex_unlock(mutex); }
};
template <typename T> class PerThreadSet {
public:
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
std::unordered_map<pthread_t, T*> map;
};
template <typename T> class PerThread {
private:
PerThreadSet<T>* set;
pthread_t self;
public:
T value;
PerThread(PerThreadSet<T>* set) : set(set), self(pthread_self()) {
LockedRegion _lock(&set->lock);
set->map[self] = &value;
}
~PerThread() {
LockedRegion _lock(&set->lock);
assert(set->map.count(self) == 1);
set->map.erase(self);
}
};
} // namespace threading
} // namespace pyston
#endif
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "core/common.h" #include "core/common.h"
#include "core/options.h" #include "core/options.h"
#include "core/thread_utils.h"
extern "C" int start_thread(void* arg); extern "C" int start_thread(void* arg);
...@@ -39,17 +40,6 @@ int tgkill(int tgid, int tid, int sig) { ...@@ -39,17 +40,6 @@ int tgkill(int tgid, int tid, int sig) {
return syscall(SYS_tgkill, tgid, tid, sig); return syscall(SYS_tgkill, tgid, tid, sig);
} }
class LockedRegion {
private:
pthread_mutex_t* mutex;
public:
LockedRegion(pthread_mutex_t* mutex) : mutex(mutex) { pthread_mutex_lock(mutex); }
~LockedRegion() { pthread_mutex_unlock(mutex); }
};
// Certain thread examination functions won't be valid for a brief // Certain thread examination functions won't be valid for a brief
// period while a thread is starting up. // period while a thread is starting up.
// To handle this, track the number of threads in an uninitialized state, // To handle this, track the number of threads in an uninitialized state,
...@@ -62,9 +52,18 @@ struct ThreadStartArgs { ...@@ -62,9 +52,18 @@ struct ThreadStartArgs {
}; };
static pthread_mutex_t threading_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t threading_lock = PTHREAD_MUTEX_INITIALIZER;
static std::unordered_set<pid_t> current_threads; struct ThreadInfo {
// "bottom" in the sense of a stack, which in a down-growing stack is the highest address:
void* stack_bottom;
pthread_t pthread_id;
};
static std::unordered_map<pid_t, ThreadInfo> current_threads;
static std::atomic<int> signals_waiting(0); void* getStackBottom() {
return current_threads[gettid()].stack_bottom;
}
static int signals_waiting(0);
static std::vector<ThreadState> thread_states; static std::vector<ThreadState> thread_states;
std::vector<ThreadState> getAllThreadStates() { std::vector<ThreadState> getAllThreadStates() {
// TODO need to prevent new threads from starting, // TODO need to prevent new threads from starting,
...@@ -89,7 +88,8 @@ std::vector<ThreadState> getAllThreadStates() { ...@@ -89,7 +88,8 @@ std::vector<ThreadState> getAllThreadStates() {
pid_t tgid = getpid(); pid_t tgid = getpid();
pid_t mytid = gettid(); pid_t mytid = gettid();
for (pid_t tid : current_threads) { for (auto& pair : current_threads) {
pid_t tid = pair.first;
if (tid == mytid) if (tid == mytid)
continue; continue;
tgkill(tgid, tid, SIGUSR2); tgkill(tgid, tid, SIGUSR2);
...@@ -119,12 +119,19 @@ static void _thread_context_dump(int signum, siginfo_t* info, void* _context) { ...@@ -119,12 +119,19 @@ static void _thread_context_dump(int signum, siginfo_t* info, void* _context) {
printf("old rip: 0x%lx\n", context->uc_mcontext.gregs[REG_RIP]); printf("old rip: 0x%lx\n", context->uc_mcontext.gregs[REG_RIP]);
} }
thread_states.push_back(ThreadState(tid, context)); #if STACK_GROWS_DOWN
signals_waiting--; // atomic on std::atomic void* stack_start = (void*)context->uc_mcontext.gregs[REG_RSP];
void* stack_end = current_threads[tid].stack_bottom;
#else
void* stack_start = current_threads[tid].stack_bottom;
void* stack_end = (void*)(context->uc_mcontext.gregs[REG_RSP] + sizeof(void*));
#endif
assert(stack_start < stack_end);
thread_states.push_back(ThreadState(tid, context, stack_start, stack_end));
signals_waiting--;
} }
static void* _thread_start(void* _arg) { static void* _thread_start(void* _arg) {
pid_t tid = gettid();
ThreadStartArgs* arg = static_cast<ThreadStartArgs*>(_arg); ThreadStartArgs* arg = static_cast<ThreadStartArgs*>(_arg);
auto start_func = arg->start_func; auto start_func = arg->start_func;
Box* arg1 = arg->arg1; Box* arg1 = arg->arg1;
...@@ -135,11 +142,33 @@ static void* _thread_start(void* _arg) { ...@@ -135,11 +142,33 @@ static void* _thread_start(void* _arg) {
{ {
LockedRegion _lock(&threading_lock); LockedRegion _lock(&threading_lock);
current_threads.insert(tid); pid_t tid = gettid();
pthread_t current_thread = pthread_self();
pthread_attr_t thread_attrs;
int code = pthread_getattr_np(current_thread, &thread_attrs);
RELEASE_ASSERT(code == 0, "");
void* stack_start;
size_t stack_size;
code = pthread_attr_getstack(&thread_attrs, &stack_start, &stack_size);
RELEASE_ASSERT(code == 0, "");
pthread_attr_destroy(&thread_attrs);
current_threads[tid] = ThreadInfo {
#if STACK_GROWS_DOWN
.stack_bottom = static_cast<char*>(stack_start) + stack_size,
#else
.stack_bottom = stack_start,
#endif
.pthread_id = current_thread,
};
num_starting_threads--; num_starting_threads--;
if (VERBOSITY() >= 2) if (VERBOSITY() >= 2)
printf("child initialized; tid=%d\n", tid); printf("child initialized; tid=%d\n", gettid());
} }
threading::GLReadRegion _glock; threading::GLReadRegion _glock;
...@@ -149,9 +178,9 @@ static void* _thread_start(void* _arg) { ...@@ -149,9 +178,9 @@ static void* _thread_start(void* _arg) {
{ {
LockedRegion _lock(&threading_lock); LockedRegion _lock(&threading_lock);
current_threads.erase(tid); current_threads.erase(gettid());
if (VERBOSITY() >= 2) if (VERBOSITY() >= 2)
printf("thread tid=%d exited\n", tid); printf("thread tid=%d exited\n", gettid());
} }
return rtn; return rtn;
...@@ -167,7 +196,7 @@ intptr_t start_thread(void* (*start_func)(Box*, Box*, Box*), Box* arg1, Box* arg ...@@ -167,7 +196,7 @@ intptr_t start_thread(void* (*start_func)(Box*, Box*, Box*), Box* arg1, Box* arg
pthread_t thread_id; pthread_t thread_id;
int code = pthread_create(&thread_id, NULL, &_thread_start, args); int code = pthread_create(&thread_id, NULL, &_thread_start, args);
assert(code == 0); RELEASE_ASSERT(code == 0, "");
if (VERBOSITY() >= 2) if (VERBOSITY() >= 2)
printf("pthread thread_id: 0x%lx\n", thread_id); printf("pthread thread_id: 0x%lx\n", thread_id);
...@@ -175,6 +204,50 @@ intptr_t start_thread(void* (*start_func)(Box*, Box*, Box*), Box* arg1, Box* arg ...@@ -175,6 +204,50 @@ intptr_t start_thread(void* (*start_func)(Box*, Box*, Box*), Box* arg1, Box* arg
return thread_id; return thread_id;
} }
// from https://www.sourceware.org/ml/guile/2000-07/msg00214.html
static void* find_stack() {
FILE* input;
char* line;
char* s;
size_t len;
char hex[9];
void* start;
void* end;
int dummy;
input = fopen("/proc/self/maps", "r");
if (input == NULL)
return NULL;
len = 0;
line = NULL;
while (getline(&line, &len, input) != -1) {
s = strchr(line, '-');
if (s == NULL)
return NULL;
*s++ = '\0';
start = (void*)strtoul(line, NULL, 16);
end = (void*)strtoul(s, NULL, 16);
if ((void*)&dummy >= start && (void*)&dummy <= end) {
free(line);
fclose(input);
#if STACK_GROWS_DOWN
return end;
#else
return start;
#endif
}
}
free(line);
fclose(input);
return NULL; /* not found =^P */
}
intptr_t call_frame_base; intptr_t call_frame_base;
void registerMainThread() { void registerMainThread() {
LockedRegion _lock(&threading_lock); LockedRegion _lock(&threading_lock);
...@@ -184,7 +257,9 @@ void registerMainThread() { ...@@ -184,7 +257,9 @@ void registerMainThread() {
// call_frame_base = (intptr_t)::start_thread; // call_frame_base = (intptr_t)::start_thread;
call_frame_base = (intptr_t)_thread_start; call_frame_base = (intptr_t)_thread_start;
current_threads.insert(gettid()); current_threads[gettid()] = ThreadInfo{
.stack_bottom = find_stack(), .pthread_id = pthread_self(),
};
struct sigaction act; struct sigaction act;
act.sa_flags = SA_SIGINFO; act.sa_flags = SA_SIGINFO;
......
...@@ -37,7 +37,10 @@ struct ThreadState { ...@@ -37,7 +37,10 @@ struct ThreadState {
pid_t tid; // useful mostly for debugging pid_t tid; // useful mostly for debugging
ucontext_t ucontext; ucontext_t ucontext;
ThreadState(pid_t tid, ucontext_t* ucontext) : tid(tid) { void* stack_start, *stack_end;
ThreadState(pid_t tid, ucontext_t* ucontext, void* stack_start, void* stack_end)
: tid(tid), stack_start(stack_start), stack_end(stack_end) {
memcpy(&this->ucontext, ucontext, sizeof(ucontext_t)); memcpy(&this->ucontext, ucontext, sizeof(ucontext_t));
this->ucontext.uc_mcontext.fpregs = &this->ucontext.__fpregs_mem; this->ucontext.uc_mcontext.fpregs = &this->ucontext.__fpregs_mem;
} }
...@@ -47,6 +50,10 @@ struct ThreadState { ...@@ -47,6 +50,10 @@ struct ThreadState {
// as a corollary, this thread is very much not thread safe. // as a corollary, this thread is very much not thread safe.
std::vector<ThreadState> getAllThreadStates(); std::vector<ThreadState> getAllThreadStates();
// Get the stack "bottom" (first pushed data; for stacks that grow down, will
// be the highest address).
void* getStackBottom();
#define THREADING_USE_GIL 1 #define THREADING_USE_GIL 1
#define THREADING_USE_GRWL 0 #define THREADING_USE_GRWL 0
#define THREADING_SAFE_DATASTRUCTURES THREADING_USE_GRWL #define THREADING_SAFE_DATASTRUCTURES THREADING_USE_GRWL
......
...@@ -14,14 +14,11 @@ ...@@ -14,14 +14,11 @@
#include "gc/root_finder.h" #include "gc/root_finder.h"
#define UNW_LOCAL_ONLY #include <cassert>
#include <libunwind.h>
#include <cstring>
#include <setjmp.h>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cassert> #include <cstring>
#include <setjmp.h>
#include <vector> #include <vector>
#include "codegen/codegen.h" #include "codegen/codegen.h"
...@@ -43,129 +40,46 @@ namespace gc { ...@@ -43,129 +40,46 @@ namespace gc {
void collectRoots(void* start, void* end, TraceStack* stack) { void collectRoots(void* start, void* end, TraceStack* stack) {
assert(start <= end); assert(start <= end);
void** cur = (void**)start; TraceStackGCVisitor(stack).visitPotentialRange((void**)start, (void**)end);
while (cur < end) {
void* p = global_heap.getAllocationFromInteriorPointer(*cur);
if (p)
stack->push(p);
cur++;
}
} }
static void _unwindStack(unw_cursor_t* cursor, TraceStack* stack) {
TraceStackGCVisitor visitor(stack);
unw_word_t ip, sp, bp;
#ifndef NVALGRIND
if (RUNNING_ON_VALGRIND) {
memset(&ip, 0, sizeof(ip));
memset(&sp, 0, sizeof(sp));
memset(&bp, 0, sizeof(bp));
}
#endif
int code;
while (true) {
int code = unw_step(cursor);
// Negative codes are errors, zero means that there isn't a new frame.
RELEASE_ASSERT(code >= 0 && "something broke unwinding!", "%d '%s'", code, unw_strerror(code));
RELEASE_ASSERT(code != 0, "didn't get to the top of the stack!");
unw_get_reg(cursor, UNW_REG_IP, &ip);
unw_get_reg(cursor, UNW_REG_SP, &sp);
unw_get_reg(cursor, UNW_TDEP_BP, &bp);
void* cur_sp = (void*)sp;
void* cur_bp = (void*)bp;
// std::string name = g.func_addr_registry.getFuncNameAtAddress((void*)ip, true);
unw_proc_info_t pip;
unw_get_proc_info(cursor, &pip);
// if (VERBOSITY()) printf("ip = 0x%lx (start_ip = 0x%lx), stack = [%p, %p)\n", (long) ip, pip.start_ip, cur_sp,
// cur_bp);
if (pip.start_ip == (uintptr_t)&__libc_start_main) {
break;
}
if (pip.start_ip == (intptr_t)interpretFunction) {
// TODO Do we still need to crawl the interpreter itself?
gatherInterpreterRootsForFrame(&visitor, cur_bp);
}
collectRoots(cur_sp, (char*)cur_bp, stack);
if (pip.start_ip == threading::call_frame_base) {
break;
}
if (cur_bp == NULL) {
// TODO I think this indicates an unwind mistake by libunwind? Not sure.
// But if it returns cur_bp=NULL, this is probably just a thread where libunwind
// didn't reconstruct the call stack exactly the way we thought.
// TODO we probably don't need to do any unwinding here at all; we can just track
// the stack min and max for every thread.
break;
}
}
}
void collectOtherThreadsStacks(TraceStack* stack) { void collectOtherThreadsStacks(TraceStack* stack) {
std::vector<threading::ThreadState> threads = threading::getAllThreadStates(); std::vector<threading::ThreadState> threads = threading::getAllThreadStates();
// unw_addr_space_t as = getOtherAddrSpace();
for (threading::ThreadState& tstate : threads) { for (threading::ThreadState& tstate : threads) {
unw_cursor_t cursor; collectRoots(tstate.stack_start, tstate.stack_end, stack);
// int code = unw_init_remote(&cursor, as, &tstate);
int code = unw_init_local(&cursor, (ucontext_t*)&tstate.ucontext);
assert(code == 0);
// printf("Collecting thread %d\n", tstate.tid);
collectRoots(&tstate.ucontext, (&tstate.ucontext) + 1, stack); collectRoots(&tstate.ucontext, (&tstate.ucontext) + 1, stack);
_unwindStack(&cursor, stack);
} }
} }
static void collectLocalStack(TraceStack* stack) { static void collectLocalStack(TraceStack* stack) {
unw_cursor_t cursor;
unw_context_t uc;
// force callee-save registers onto the stack: // force callee-save registers onto the stack:
// Actually, I feel like this is pretty brittle: // Actually, I feel like this is pretty brittle:
// collectLocalStack itself is allowed to save the callee-save registers // collectLocalStack itself is allowed to save the callee-save registers
// on its own stack. // on its own stack.
jmp_buf registers __attribute__((aligned(sizeof(void*)))); jmp_buf registers __attribute__((aligned(sizeof(void*))));
#ifndef NVALGRIND
if (RUNNING_ON_VALGRIND) {
memset(&registers, 0, sizeof(registers));
memset(&cursor, 0, sizeof(cursor));
memset(&uc, 0, sizeof(uc));
}
#endif
setjmp(registers); setjmp(registers);
assert(sizeof(registers) % 8 == 0); assert(sizeof(registers) % 8 == 0);
// void* stack_bottom = __builtin_frame_address(0); // void* stack_bottom = __builtin_frame_address(0);
collectRoots(&registers, &registers + 1, stack); collectRoots(&registers, (&registers) + 1, stack);
unw_getcontext(&uc);
unw_init_local(&cursor, &uc);
_unwindStack(&cursor, stack); void* stack_bottom = threading::getStackBottom();
#if STACK_GROWS_DOWN
collectRoots(&registers, stack_bottom, stack);
#else
collectRoots(stack_bottom, &registers + 1, stack);
#endif
} }
void collectStackRoots(TraceStack* stack) { void collectStackRoots(TraceStack* stack) {
collectLocalStack(stack); collectLocalStack(stack);
collectOtherThreadsStacks(stack); collectOtherThreadsStacks(stack);
TraceStackGCVisitor visitor(stack);
gatherInterpreterRoots(&visitor);
} }
} }
} }
...@@ -24,6 +24,12 @@ namespace pyston { ...@@ -24,6 +24,12 @@ namespace pyston {
class PystonTestEnvironment : public testing::Environment { class PystonTestEnvironment : public testing::Environment {
void SetUp() override { void SetUp() override {
threading::registerMainThread(); threading::registerMainThread();
threading::acquireGLRead();
}
void TearDown() override {
threading::releaseGLRead();
} }
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment