Commit 1c1dcdb9 authored by Marius Wachtler's avatar Marius Wachtler

Allocate all ASTInterpreter instances on the stack and remove the interpreter map.

Not having the ASTInterpreter GC allocated improves performance.
I had to add a small asm function in order to produce a special stack frame where we can easily retrieve the ASTInterpreter*,
to replace s_interpreterMaps job. This also make sure that this function really does not get inlined.
The s_interpreterMap was hard to understand and produced several times problems (duplicate entries,...)

This patch contains a hack which limits the number of variables inside a function to 512.
Because we have to make sure the are all on the stack and can't dynamically add more space.
An upcoming patch will remove this limitation and replace it with a stack alloca of the size of the actual number of variables the function uses.
parent 5071e380
......@@ -34,6 +34,7 @@ add_library(PYSTON_OBJECTS OBJECT ${OPTIONAL_SRCS}
capi/object.cpp
capi/typeobject.cpp
codegen/ast_interpreter.cpp
codegen/ast_interpreter_exec.S
codegen/baseline_jit.cpp
codegen/codegen.cpp
codegen/compvars.cpp
......
......@@ -55,49 +55,27 @@ namespace pyston {
namespace {
static BoxedClass* astinterpreter_cls;
class ASTInterpreter;
// Map from stack frame pointers for frames corresponding to ASTInterpreter::execute() to the ASTInterpreter handling
// them. Used to look up information about that frame. This is used for getting tracebacks, for CPython introspection
// (sys._getframe & co), and for GC scanning.
static std::unordered_map<void*, ASTInterpreter*> s_interpreterMap;
static_assert(THREADING_USE_GIL, "have to make the interpreter map thread safe!");
class RegisterHelper {
private:
void* frame_addr;
ASTInterpreter* interpreter;
public:
RegisterHelper();
~RegisterHelper();
void doRegister(void* frame_addr, ASTInterpreter* interpreter);
static void deregister(void* frame_addr);
};
extern "C" Box* executeInnerAndSetupFrame(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at);
/*
* ASTInterpreters exist per function frame - there's no global interpreter object that executes
* all non-jitted code!
*
* The ASTInterpreter inherits from Box as part of garbage collection support.
* All ASTInterpreter instances have to live on the stack because otherwise the GC won't scan the fields.
*/
class ASTInterpreter : public Box {
class ASTInterpreter {
public:
typedef ContiguousMap<InternedString, Box*, llvm::SmallDenseMap<InternedString, int, 16>> SymMap;
typedef ContiguousMap<InternedString, Box*, llvm::SmallDenseMap<InternedString, int, 16>,
llvm::SmallVector<Box*, 512>> SymMap;
ASTInterpreter(CLFunction* clfunc);
void initArguments(int nargs, BoxedClosure* closure, BoxedGenerator* generator, Box* arg1, Box* arg2, Box* arg3,
Box** args);
static Value execute(ASTInterpreter& interpreter, CFGBlock* start_block = NULL, AST_stmt* start_at = NULL);
// This must not be inlined, because we rely on being able to detect when we're inside of it (by checking whether
// %rip is inside its instruction range) during a stack-trace in order to produce tracebacks inside interpreted
// code.
__attribute__((__no_inline__)) __attribute__((noinline)) static Value
executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at, RegisterHelper* reg);
static Box* execute(ASTInterpreter& interpreter, CFGBlock* start_block = NULL, AST_stmt* start_at = NULL);
static Box* executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at);
private:
Box* createFunction(AST* node, AST_arguments* args, const std::vector<AST_stmt*>& body);
......@@ -155,8 +133,7 @@ private:
void startJITing(CFGBlock* block, int exit_offset = 0);
void abortJITing();
void finishJITing(CFGBlock* continue_block = NULL);
// This method is not allowed to get inlined into 'executeInner' otherwise tracebacks are wrong.
__attribute__((__no_inline__)) __attribute__((noinline)) Box* execJITedBlock(CFGBlock* b);
Box* execJITedBlock(CFGBlock* b);
// this variables are used by the baseline JIT, make sure they have an offset < 0x80 so we can use shorter
// instructions
......@@ -174,15 +151,13 @@ private:
BoxedGenerator* generator;
unsigned edgecount;
FrameInfo frame_info;
BoxedModule* parent_module;
// This is either a module or a dict
Box* globals;
void* frame_addr; // used to clear entry inside the s_interpreterMap on destruction
std::unique_ptr<JitFragmentWriter> jit;
public:
DEFAULT_CLASS_SIMPLE(astinterpreter_cls);
AST_stmt* getCurrentStatement() {
assert(current_inst);
return current_inst;
......@@ -207,16 +182,6 @@ public:
void setFrameInfo(const FrameInfo* frame_info);
void setGlobals(Box* globals);
static void gcHandler(GCVisitor* visitor, Box* box);
static void simpleDestructor(Box* box) {
ASTInterpreter* inter = (ASTInterpreter*)box;
assert(inter->cls == astinterpreter_cls);
if (inter->frame_addr)
RegisterHelper::deregister(inter->frame_addr);
inter->~ASTInterpreter();
}
friend class RegisterHelper;
friend struct pyston::ASTInterpreterJitInterface;
};
......@@ -257,20 +222,6 @@ void ASTInterpreter::setGlobals(Box* globals) {
this->globals = globals;
}
void ASTInterpreter::gcHandler(GCVisitor* visitor, Box* box) {
boxGCHandler(visitor, box);
ASTInterpreter* interp = (ASTInterpreter*)box;
auto&& vec = interp->sym_table.vector();
visitor->visitRange((void* const*)&vec[0], (void* const*)&vec[vec.size()]);
visitor->visit(interp->passed_closure);
visitor->visit(interp->created_closure);
visitor->visit(interp->generator);
visitor->visit(interp->globals);
visitor->visit(interp->source_info->parent_module);
interp->frame_info.gcVisit(visitor);
}
ASTInterpreter::ASTInterpreter(CLFunction* clfunc)
: current_block(0),
current_inst(0),
......@@ -284,8 +235,8 @@ ASTInterpreter::ASTInterpreter(CLFunction* clfunc)
generator(0),
edgecount(0),
frame_info(ExcInfo(NULL, NULL, NULL)),
globals(0),
frame_addr(0) {
parent_module(source_info->parent_module),
globals(0) {
scope_info = source_info->getScopeInfo();
......@@ -325,31 +276,6 @@ void ASTInterpreter::initArguments(int nargs, BoxedClosure* _closure, BoxedGener
assert(nargs == i);
}
RegisterHelper::RegisterHelper() : frame_addr(NULL), interpreter(NULL) {
}
RegisterHelper::~RegisterHelper() {
assert(interpreter);
assert(interpreter->frame_addr == frame_addr);
interpreter->frame_addr = nullptr;
deregister(frame_addr);
}
void RegisterHelper::doRegister(void* frame_addr, ASTInterpreter* interpreter) {
assert(!this->interpreter);
assert(!this->frame_addr);
this->frame_addr = frame_addr;
this->interpreter = interpreter;
interpreter->frame_addr = frame_addr;
s_interpreterMap[frame_addr] = interpreter;
}
void RegisterHelper::deregister(void* frame_addr) {
assert(frame_addr);
assert(s_interpreterMap.count(frame_addr));
s_interpreterMap.erase(frame_addr);
}
void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset) {
assert(ENABLE_BASELINEJIT);
assert(!jit);
......@@ -405,12 +331,7 @@ Box* ASTInterpreter::execJITedBlock(CFGBlock* b) {
return nullptr;
}
Value ASTInterpreter::executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at,
RegisterHelper* reg) {
void* frame_addr = __builtin_frame_address(0);
reg->doRegister(frame_addr, &interpreter);
Box* ASTInterpreter::executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at) {
Value v;
bool should_jit = false;
......@@ -462,7 +383,7 @@ Value ASTInterpreter::executeInner(ASTInterpreter& interpreter, CFGBlock* start_
Box* rtn = interpreter.execJITedBlock(b);
if (interpreter.next_block)
continue;
return Value(rtn, nullptr);
return rtn;
}
}
......@@ -478,10 +399,10 @@ Value ASTInterpreter::executeInner(ASTInterpreter& interpreter, CFGBlock* start_
v = interpreter.visit_stmt(s);
}
}
return v;
return v.o;
}
Value ASTInterpreter::execute(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at) {
Box* ASTInterpreter::execute(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at) {
UNAVOIDABLE_STAT_TIMER(t0, "us_timer_in_interpreter");
// Note: due to some (avoidable) restrictions, this check is pretty constrained in where
......@@ -494,8 +415,7 @@ Value ASTInterpreter::execute(ASTInterpreter& interpreter, CFGBlock* start_block
if (!interpreter.source_info->cfg)
interpreter.source_info->cfg = computeCFG(interpreter.source_info, interpreter.source_info->body);
RegisterHelper frame_registerer;
return executeInner(interpreter, start_block, start_at, &frame_registerer);
return executeInnerAndSetupFrame(interpreter, start_block, start_at);
}
Value ASTInterpreter::doBinOp(Value left, Value right, int op, BinExpType exp_type) {
......@@ -1410,13 +1330,13 @@ Value ASTInterpreter::visit_expr(AST_Expr* node) {
Value ASTInterpreter::visit_num(AST_Num* node) {
Box* o = NULL;
if (node->num_type == AST_Num::INT) {
o = source_info->parent_module->getIntConstant(node->n_int);
o = parent_module->getIntConstant(node->n_int);
} else if (node->num_type == AST_Num::FLOAT) {
o = source_info->parent_module->getFloatConstant(node->n_float);
o = parent_module->getFloatConstant(node->n_float);
} else if (node->num_type == AST_Num::LONG) {
o = source_info->parent_module->getLongConstant(node->n_long);
o = parent_module->getLongConstant(node->n_long);
} else if (node->num_type == AST_Num::COMPLEX) {
o = source_info->parent_module->getPureImaginaryConstant(node->n_float);
o = parent_module->getPureImaginaryConstant(node->n_float);
} else
RELEASE_ASSERT(0, "not implemented");
return Value(o, jit ? jit->imm(o) : NULL);
......@@ -1475,9 +1395,9 @@ Value ASTInterpreter::visit_set(AST_Set* node) {
Value ASTInterpreter::visit_str(AST_Str* node) {
Box* o = NULL;
if (node->str_type == AST_Str::STR) {
o = source_info->parent_module->getStringConstant(node->str_data);
o = parent_module->getStringConstant(node->str_data);
} else if (node->str_type == AST_Str::UNICODE) {
o = source_info->parent_module->getUnicodeConstant(node->str_data);
o = parent_module->getUnicodeConstant(node->str_data);
} else {
RELEASE_ASSERT(0, "%d", node->str_type);
}
......@@ -1659,6 +1579,7 @@ void ASTInterpreterJitInterface::setLocalClosureHelper(void* _interpreter, Inter
assert(gc::isValidGCObject(v));
interpreter->sym_table[id] = v;
RELEASE_ASSERT(interpreter->sym_table.size() < 512, "sym table to small");
interpreter->created_closure->elts[interpreter->scope_info->getClosureOffset(id)] = v;
}
......@@ -1668,10 +1589,15 @@ void ASTInterpreterJitInterface::setLocalHelper(void* _interpreter, InternedStri
assert(gc::isValidGCObject(v));
interpreter->sym_table[id] = v;
RELEASE_ASSERT(interpreter->sym_table.size() < 512, "sym table to small");
}
const void* interpreter_instr_addr = (void*)&executeInnerAndSetupFrame;
const void* interpreter_instr_addr = (void*)&ASTInterpreter::executeInner;
// small wrapper around executeInner because we can not directly call the member function from asm.
extern "C" Box* executeInnerFromASM(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at) {
return ASTInterpreter::executeInner(interpreter, start_block, start_at);
}
Box* astInterpretFunction(CLFunction* clfunc, int nargs, Box* closure, Box* generator, Box* globals, Box* arg1,
Box* arg2, Box* arg3, Box** args) {
......@@ -1725,43 +1651,41 @@ Box* astInterpretFunction(CLFunction* clfunc, int nargs, Box* closure, Box* gene
}
++clfunc->times_interpreted;
ASTInterpreter* interpreter = new ASTInterpreter(clfunc);
ASTInterpreter interpreter(clfunc);
ScopeInfo* scope_info = clfunc->source->getScopeInfo();
if (unlikely(scope_info->usesNameLookup())) {
interpreter->setBoxedLocals(new BoxedDict());
interpreter.setBoxedLocals(new BoxedDict());
}
assert((!globals) == clfunc->source->scoping->areGlobalsFromModule());
if (globals) {
interpreter->setGlobals(globals);
interpreter.setGlobals(globals);
} else {
interpreter->setGlobals(source_info->parent_module);
interpreter.setGlobals(source_info->parent_module);
}
interpreter->initArguments(nargs, (BoxedClosure*)closure, (BoxedGenerator*)generator, arg1, arg2, arg3, args);
Value v = ASTInterpreter::execute(*interpreter);
return v.o ? v.o : None;
interpreter.initArguments(nargs, (BoxedClosure*)closure, (BoxedGenerator*)generator, arg1, arg2, arg3, args);
Box* v = ASTInterpreter::execute(interpreter);
return v ? v : None;
}
Box* astInterpretFunctionEval(CLFunction* clfunc, Box* globals, Box* boxedLocals) {
++clfunc->times_interpreted;
ASTInterpreter* interpreter = new ASTInterpreter(clfunc);
interpreter->initArguments(0, NULL, NULL, NULL, NULL, NULL, NULL);
interpreter->setBoxedLocals(boxedLocals);
ASTInterpreter interpreter(clfunc);
interpreter.initArguments(0, NULL, NULL, NULL, NULL, NULL, NULL);
interpreter.setBoxedLocals(boxedLocals);
ScopeInfo* scope_info = clfunc->source->getScopeInfo();
SourceInfo* source_info = clfunc->source.get();
assert(!clfunc->source->scoping->areGlobalsFromModule());
assert(globals);
interpreter->setGlobals(globals);
Value v = ASTInterpreter::execute(*interpreter);
interpreter.setGlobals(globals);
return v.o ? v.o : None;
Box* v = ASTInterpreter::execute(interpreter);
return v ? v : None;
}
Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclosing_stmt, Box* expr_val,
......@@ -1772,29 +1696,29 @@ Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclo
assert(after_expr);
assert(expr_val);
ASTInterpreter* interpreter = new ASTInterpreter(clfunc);
ASTInterpreter interpreter(clfunc);
ScopeInfo* scope_info = clfunc->source->getScopeInfo();
SourceInfo* source_info = clfunc->source.get();
assert(clfunc->source->scoping->areGlobalsFromModule());
interpreter->setGlobals(source_info->parent_module);
interpreter.setGlobals(source_info->parent_module);
for (const auto& p : frame_state.locals->d) {
assert(p.first->cls == str_cls);
auto name = static_cast<BoxedString*>(p.first)->s();
if (name == PASSED_GENERATOR_NAME) {
interpreter->setGenerator(p.second);
interpreter.setGenerator(p.second);
} else if (name == PASSED_CLOSURE_NAME) {
interpreter->setPassedClosure(p.second);
interpreter.setPassedClosure(p.second);
} else if (name == CREATED_CLOSURE_NAME) {
interpreter->setCreatedClosure(p.second);
interpreter.setCreatedClosure(p.second);
} else {
InternedString interned = clfunc->source->getInternedStrings().get(name);
interpreter->addSymbol(interned, p.second, false);
interpreter.addSymbol(interned, p.second, false);
}
}
interpreter->setFrameInfo(frame_state.frame_info);
interpreter.setFrameInfo(frame_state.frame_info);
CFGBlock* start_block = NULL;
AST_stmt* starting_statement = NULL;
......@@ -1806,7 +1730,7 @@ Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclo
assert(asgn->targets[0]->type == AST_TYPE::Name);
auto name = ast_cast<AST_Name>(asgn->targets[0]);
assert(name->id.s()[0] == '#');
interpreter->addSymbol(name->id, expr_val, true);
interpreter.addSymbol(name->id, expr_val, true);
break;
} else if (enclosing_stmt->type == AST_TYPE::Expr) {
auto expr = ast_cast<AST_Expr>(enclosing_stmt);
......@@ -1844,37 +1768,42 @@ Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclo
assert(starting_statement);
}
Value v = ASTInterpreter::execute(*interpreter, start_block, starting_statement);
Box* v = ASTInterpreter::execute(interpreter, start_block, starting_statement);
return v ? v : None;
}
return v.o ? v.o : None;
static ASTInterpreter* getInterpreterFromFramePtr(void* frame_ptr) {
// This offsets have to match the layout inside executeInnerAndSetupFrame
ASTInterpreter** ptr = (ASTInterpreter**)(((uint8_t*)frame_ptr) - 8);
return *ptr;
}
AST_stmt* getCurrentStatementForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = s_interpreterMap[frame_ptr];
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getCurrentStatement();
}
Box* getGlobalsForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = s_interpreterMap[frame_ptr];
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getGlobals();
}
CLFunction* getCLForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = s_interpreterMap[frame_ptr];
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getCL();
}
FrameInfo* getFrameInfoForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = s_interpreterMap[frame_ptr];
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getFrameInfo();
}
BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible) {
ASTInterpreter* interpreter = s_interpreterMap[frame_ptr];
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
BoxedDict* rtn = new BoxedDict();
for (auto& l : interpreter->getSymbolTable()) {
......@@ -1888,16 +1817,8 @@ BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible) {
}
BoxedClosure* passedClosureForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = s_interpreterMap[frame_ptr];
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getPassedClosure();
}
void setupInterpreter() {
astinterpreter_cls = BoxedHeapClass::create(type_cls, object_cls, ASTInterpreter::gcHandler, 0, 0,
sizeof(ASTInterpreter), false, "astinterpreter");
astinterpreter_cls->tp_dealloc = ASTInterpreter::simpleDestructor;
astinterpreter_cls->has_safe_tp_dealloc = true;
astinterpreter_cls->freeze();
}
}
......@@ -70,7 +70,6 @@ struct Value {
Value(Box* o, RewriterVar* var) : o(o), var(var) {}
};
void setupInterpreter();
Box* astInterpretFunction(CLFunction* f, int nargs, Box* closure, Box* generator, Box* globals, Box* arg1, Box* arg2,
Box* arg3, Box** args);
Box* astInterpretFunctionEval(CLFunction* cf, Box* globals, Box* boxedLocals);
......
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This functions sets up a fixed stack frame which we use to detect ASTInterpreter frames
// and which makes it easy retrieve the passed ASTInterpreter pointer (stored at frame_ptr-8).
// It's written in ASM to make sure the stack layout keeps beeing the same and that nothing gets inlined.
// Our unwinder treats this function specialy.
// Box* executeInnerAndSetupFrame(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at)
.text
.globl executeInnerAndSetupFrame
.type executeInnerAndSetupFrame,@function
.align 16
executeInnerAndSetupFrame:
.cfi_startproc
push %rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp,-16
mov %rsp, %rbp
.cfi_def_cfa_register rbp
sub $16, %rsp
mov %rdi, -8(%rbp)
call executeInnerFromASM
leave
.cfi_def_cfa rsp,8
ret
.cfi_endproc
.size executeInnerAndSetupFrame,.-executeInnerAndSetupFrame
.section .note.GNU-stack,"",%progbits // we don't need executable stack
......@@ -21,9 +21,10 @@
namespace pyston {
template <class TKey, class TVal, class TMap = llvm::DenseMap<TKey, int>> class ContiguousMap {
template <class TKey, class TVal, class TMap = llvm::DenseMap<TKey, int>, class TVec = std::vector<TVal>>
class ContiguousMap {
typedef TMap map_type;
typedef std::vector<TVal> vec_type;
typedef TVec vec_type;
map_type map;
vec_type vec;
......
......@@ -3339,7 +3339,6 @@ void setupRuntime() {
closure_cls->freeze();
setupUnwinding();
setupInterpreter();
setupCAPI();
// Can't set up object methods until we set up CAPI support:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment