Commit 7863a419 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Change generator stack allocation

Previously, we just included a char[] buffer in the generator object,
and used that as the stack.

Now, mmap() a dedicated stack segment for it.  Use MAP_GROWSDOWN so that
the stack is automatically expandable, but add a redzone 4MB down to
limit the maximum stack size.
parent 6a3e5219
...@@ -132,6 +132,8 @@ static std::unordered_map<pthread_t, ThreadStateInternal*> current_threads; ...@@ -132,6 +132,8 @@ static std::unordered_map<pthread_t, ThreadStateInternal*> current_threads;
// TODO could optimize these by keeping a __thread local reference to current_threads[pthread_self()] // TODO could optimize these by keeping a __thread local reference to current_threads[pthread_self()]
void pushGenerator(BoxedGenerator* g, void* new_stack_start, void* old_stack_limit) { void pushGenerator(BoxedGenerator* g, void* new_stack_start, void* old_stack_limit) {
assert(new_stack_start);
assert(old_stack_limit);
current_threads[pthread_self()]->pushGenerator(g, new_stack_start, old_stack_limit); current_threads[pthread_self()]->pushGenerator(g, new_stack_start, old_stack_limit);
} }
......
...@@ -74,7 +74,7 @@ public: ...@@ -74,7 +74,7 @@ public:
assert(size % PAGE_SIZE == 0); assert(size % PAGE_SIZE == 0);
// printf("mmap %ld\n", size); // printf("mmap %ld\n", size);
void* mrtn = mmap(cur, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); void* mrtn = mmap(cur, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert((uintptr_t)mrtn != -1 && "failed to allocate memory from OS"); assert((uintptr_t)mrtn != -1 && "failed to allocate memory from OS");
ASSERT(mrtn == cur, "%p %p\n", mrtn, cur); ASSERT(mrtn == cur, "%p %p\n", mrtn, cur);
cur = (uint8_t*)cur + size; cur = (uint8_t*)cur + size;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <algorithm> #include <algorithm>
#include <cstddef> #include <cstddef>
#include <cstring> #include <cstring>
#include <sys/mman.h>
#include <ucontext.h> #include <ucontext.h>
#include "core/ast.h" #include "core/ast.h"
...@@ -30,12 +31,21 @@ ...@@ -30,12 +31,21 @@
namespace pyston { namespace pyston {
static uint64_t next_stack_addr = 0x3270000000L;
static std::vector<uint64_t> available_addrs;
// There should be a better way of getting this:
#define PAGE_SIZE 4096
#define INITIAL_STACK_SIZE (8 * PAGE_SIZE)
#define STACK_REDZONE_SIZE PAGE_SIZE
#define MAX_STACK_SIZE (4 * 1024 * 1024)
static void generatorEntry(BoxedGenerator* g) { static void generatorEntry(BoxedGenerator* g) {
assert(g->cls == generator_cls); assert(g->cls == generator_cls);
assert(g->function->cls == function_cls); assert(g->function->cls == function_cls);
threading::pushGenerator(g, g->stack + BoxedGenerator::STACK_SIZE,
(void*)g->returnContext.uc_mcontext.gregs[REG_RSP]); threading::pushGenerator(g, g->stack_begin, (void*)g->returnContext.uc_mcontext.gregs[REG_RSP]);
try { try {
// call body of the generator // call body of the generator
...@@ -115,8 +125,7 @@ extern "C" Box* yield(BoxedGenerator* obj, Box* value) { ...@@ -115,8 +125,7 @@ extern "C" Box* yield(BoxedGenerator* obj, Box* value) {
threading::popGenerator(); threading::popGenerator();
swapcontext(&self->context, &self->returnContext); swapcontext(&self->context, &self->returnContext);
threading::pushGenerator(obj, obj->stack + BoxedGenerator::STACK_SIZE, threading::pushGenerator(obj, obj->stack_begin, (void*)obj->returnContext.uc_mcontext.gregs[REG_RSP]);
(void*)obj->returnContext.uc_mcontext.gregs[REG_RSP]);
// if the generator receives a exception from the caller we have to throw it // if the generator receives a exception from the caller we have to throw it
if (self->exception) { if (self->exception) {
...@@ -150,8 +159,40 @@ extern "C" BoxedGenerator::BoxedGenerator(BoxedFunction* function, Box* arg1, Bo ...@@ -150,8 +159,40 @@ extern "C" BoxedGenerator::BoxedGenerator(BoxedFunction* function, Box* arg1, Bo
getcontext(&context); getcontext(&context);
context.uc_link = 0; context.uc_link = 0;
context.uc_stack.ss_sp = stack;
context.uc_stack.ss_size = STACK_SIZE; uint64_t stack_low = next_stack_addr;
uint64_t stack_high = stack_low + MAX_STACK_SIZE;
next_stack_addr = stack_high;
#if STACK_GROWS_DOWN
this->stack_begin = (void*)stack_high;
void* initial_stack_limit = (void*)(stack_high - INITIAL_STACK_SIZE);
void* p = mmap(initial_stack_limit, INITIAL_STACK_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0);
assert(p == initial_stack_limit);
context.uc_stack.ss_sp = initial_stack_limit;
context.uc_stack.ss_size = INITIAL_STACK_SIZE;
// Create an inaccessible redzone so that the generator stack won't grow indefinitely.
// Looks like it throws a SIGBUS if we reach the redzone; it's unclear if that's better
// or worse than being able to consume all available memory.
void* p2 = mmap((void*)stack_low, STACK_REDZONE_SIZE, PROT_NONE, MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
assert(p2 == (void*)stack_low);
// Interestingly, it seems like MAP_GROWSDOWN will leave a page-size gap between the redzone and the growable
// region.
if (VERBOSITY() >= 1) {
printf("Created new generator stack, starts at %p, currently extends to %p\n", (void*)stack_high,
initial_stack_limit);
printf("Created a redzone from %p-%p\n", (void*)stack_low, (void*)(stack_low + STACK_REDZONE_SIZE));
}
#else
#error "implement me"
#endif
makecontext(&context, (void (*)(void))generatorEntry, 1, this); makecontext(&context, (void (*)(void))generatorEntry, 1, this);
} }
...@@ -183,7 +224,7 @@ extern "C" void generatorGCHandler(GCVisitor* v, Box* b) { ...@@ -183,7 +224,7 @@ extern "C" void generatorGCHandler(GCVisitor* v, Box* b) {
v->visitPotentialRange((void**)&g->context, ((void**)&g->context) + sizeof(g->context) / sizeof(void*)); v->visitPotentialRange((void**)&g->context, ((void**)&g->context) + sizeof(g->context) / sizeof(void*));
#if STACK_GROWS_DOWN #if STACK_GROWS_DOWN
v->visitPotentialRange((void**)g->context.uc_mcontext.gregs[REG_RSP], (void**)g->stack + BoxedGenerator::STACK_SIZE); v->visitPotentialRange((void**)g->context.uc_mcontext.gregs[REG_RSP], (void**)g->stack_begin);
#endif #endif
} }
} }
......
...@@ -516,8 +516,6 @@ public: ...@@ -516,8 +516,6 @@ public:
class BoxedGenerator : public Box { class BoxedGenerator : public Box {
public: public:
enum { STACK_SIZE = SIGSTKSZ * 5 };
HCAttrs attrs; HCAttrs attrs;
BoxedFunction* function; BoxedFunction* function;
Box* arg1, *arg2, *arg3; Box* arg1, *arg2, *arg3;
...@@ -529,7 +527,7 @@ public: ...@@ -529,7 +527,7 @@ public:
Box* exception; Box* exception;
ucontext_t context, returnContext; ucontext_t context, returnContext;
char stack[STACK_SIZE]; void* stack_begin;
BoxedGenerator(BoxedFunction* function, Box* arg1, Box* arg2, Box* arg3, Box** args); BoxedGenerator(BoxedFunction* function, Box* arg1, Box* arg2, Box* arg3, Box** args);
}; };
......
# Make sure we can recurse at least 900 times on the three different types
# of stacks that we have:
def recurse(n):
if n > 0:
return recurse(n - 1)
return n
print "Recursing on main thread..."
recurse(900)
print "Recursing in a generator..."
def gen():
yield recurse(900)
print list(gen())
print "Recursing in a thread..."
from thread import start_new_thread
import time
done = 0
def thread_target():
global done
recurse(900)
done = 1
start_new_thread(thread_target, ())
while not done:
time.sleep(0.001)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment