Commit 059ea8c2 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Support frame introspection

The approach in this changeset is to attach frame args
to every call site via stackmap args.  We make sure that
every callsite is a patchpoint, and serialize the symbol
table into its arguments.

In this patch, that is just used for supporting the locals()
method.

It's currently disabled since it has a hugely negative impact
on performance (LLVM takes much longer to JIT with all the extra
function arguments).
parent 06274389
......@@ -18,6 +18,7 @@
#include <unordered_set>
#include "asm_writing/types.h"
#include "codegen/stackmaps.h"
#include "core/ast.h"
namespace pyston {
......
......@@ -1111,6 +1111,72 @@ int RewriterVar::nvars = 0;
static const int INITIAL_CALL_SIZE = 13;
static const int DWARF_RBP_REGNUM = 6;
bool spillFrameArgumentIfNecessary(StackMap::Record::Location& l, uint8_t*& inst_addr, uint8_t* inst_end,
int& scratch_offset, int& scratch_size, SpillMap& remapped) {
switch (l.type) {
case StackMap::Record::Location::LocationType::Direct:
case StackMap::Record::Location::LocationType::Indirect:
case StackMap::Record::Location::LocationType::Constant:
case StackMap::Record::Location::LocationType::ConstIndex:
return false;
case StackMap::Record::Location::LocationType::Register: {
assembler::GenericRegister ru = assembler::GenericRegister::fromDwarf(l.regnum);
if (!Location(ru).isClobberedByCall())
return false;
auto it = remapped.find(ru);
if (it != remapped.end()) {
if (VERBOSITY()) {
printf("Already spilled ");
ru.dump();
}
l = it->second;
return false;
}
if (VERBOSITY()) {
printf("Spilling reg ");
ru.dump();
}
assembler::Assembler assembler(inst_addr, inst_end - inst_addr);
int bytes_pushed;
if (ru.type == assembler::GenericRegister::GP) {
auto dest = assembler::Indirect(assembler::RBP, scratch_offset);
assembler.mov(ru.gp, dest);
bytes_pushed = 8;
} else if (ru.type == assembler::GenericRegister::XMM) {
auto dest = assembler::Indirect(assembler::RBP, scratch_offset);
assembler.movsd(ru.xmm, dest);
bytes_pushed = 8;
} else {
abort();
}
assert(scratch_size >= bytes_pushed);
assert(!assembler.hasFailed());
uint8_t* cur_addr = assembler.curInstPointer();
inst_addr = cur_addr;
l.type = StackMap::Record::Location::LocationType::Indirect;
l.regnum = DWARF_RBP_REGNUM;
l.offset = scratch_offset;
scratch_offset += bytes_pushed;
scratch_size -= bytes_pushed;
remapped[ru] = l;
return true;
}
default:
abort();
}
}
void* extractSlowpathFunc(uint8_t* pp_addr) {
#ifndef NDEBUG
// mov $imm, %r11:
......@@ -1135,12 +1201,13 @@ void* extractSlowpathFunc(uint8_t* pp_addr) {
std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs) {
const std::unordered_set<int>& live_outs, SpillMap& remapped) {
assert(start_addr < end_addr);
int est_slowpath_size = INITIAL_CALL_SIZE;
std::vector<assembler::GenericRegister> regs_to_spill;
std::vector<assembler::Register> regs_to_reload;
for (int dwarf_regnum : live_outs) {
assembler::GenericRegister ru = assembler::GenericRegister::fromDwarf(dwarf_regnum);
......@@ -1154,6 +1221,14 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t
// Location(ru).dump();
if (ru.type == assembler::GenericRegister::GP && remapped.count(ru)) {
// printf("already spilled!\n");
regs_to_reload.push_back(ru.gp);
est_slowpath_size += 7; // 7 bytes for a single mov
continue;
}
regs_to_spill.push_back(ru);
if (ru.type == assembler::GenericRegister::GP)
......@@ -1183,6 +1258,14 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t
uint8_t* rtn = assem.emitCall(slowpath_func, assembler::R11);
assem.emitBatchPop(scratch_offset, scratch_size, regs_to_spill);
for (assembler::Register r : regs_to_reload) {
StackMap::Record::Location& l = remapped[r];
assert(l.type == StackMap::Record::Location::LocationType::Indirect);
assert(l.regnum == DWARF_RBP_REGNUM);
assem.mov(assembler::Indirect(assembler::RBP, l.offset), r);
}
assem.fillWithNops();
assert(!assem.hasFailed());
......
......@@ -15,6 +15,7 @@
#ifndef PYSTON_ASMWRITING_REWRITER_H
#define PYSTON_ASMWRITING_REWRITER_H
#include <map>
#include <memory>
#include "asm_writing/assembler.h"
......@@ -315,10 +316,28 @@ public:
void* extractSlowpathFunc(uint8_t* pp_addr);
struct GRCompare {
bool operator()(assembler::GenericRegister gr1, assembler::GenericRegister gr2) const {
if (gr1.type != gr2.type)
return gr1.type < gr2.type;
if (gr1.type == assembler::GenericRegister::GP)
return gr1.gp.regnum < gr2.gp.regnum;
if (gr1.type == assembler::GenericRegister::XMM)
return gr1.xmm.regnum < gr2.xmm.regnum;
abort();
}
};
typedef std::map<assembler::GenericRegister, StackMap::Record::Location, GRCompare> SpillMap;
// Spills the stackmap argument and guarantees that it will be readable by the unwinder.
// Updates the arguments if it did any spilling, and returns whether spilling happened.
bool spillFrameArgumentIfNecessary(StackMap::Record::Location& l, uint8_t*& inst_addr, uint8_t* inst_end,
int& scratch_offset, int& scratch_size, SpillMap& remapped);
// returns (start_of_slowpath, return_addr_of_slowpath_call)
std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs);
const std::unordered_set<int>& live_outs, SpillMap& remapped);
}
#endif
......@@ -34,6 +34,16 @@
namespace pyston {
void ConcreteCompilerType::serializeToFrame(VAR* var, std::vector<llvm::Value*>& stackmap_args) {
#ifndef NDEBUG
if (llvmType() == g.i1) {
var->getValue()->dump();
ASSERT(llvmType() != g.i1, "due to an llvm limitation cannot add these to stackmaps yet");
}
#endif
stackmap_args.push_back(var->getValue());
}
std::string ValuedCompilerType<llvm::Value*>::debugName() {
std::string rtn;
llvm::raw_string_ostream os(rtn);
......@@ -150,6 +160,18 @@ public:
}
return rtn;
}
void serializeToFrame(VAR* var, std::vector<llvm::Value*>& stackmap_args) override {
var->getValue()->obj->serializeToFrame(stackmap_args);
var->getValue()->func->serializeToFrame(stackmap_args);
}
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == numFrameArgs());
abort();
}
int numFrameArgs() override { return obj_type->numFrameArgs() + function_type->numFrameArgs(); }
};
std::unordered_map<std::pair<CompilerType*, CompilerType*>, InstanceMethodType*> InstanceMethodType::made;
......@@ -353,6 +375,11 @@ public:
return new ConcreteCompilerVariable(UNKNOWN, rtn, true);
}
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == 1);
return reinterpret_cast<Box*>(vals[0]);
}
};
ConcreteCompilerType* UNKNOWN = new UnknownType();
......@@ -702,6 +729,13 @@ public:
}
static CompilerType* get(const std::vector<Sig*>& sigs) { return new AbstractFunctionType(sigs); }
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == numFrameArgs());
abort();
}
int numFrameArgs() override { abort(); }
};
class IntType : public ConcreteCompilerType {
......@@ -908,6 +942,12 @@ public:
}
virtual ConcreteCompilerType* getBoxType() { return BOXED_INT; }
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == 1);
return boxInt(vals[0]);
}
} _INT;
ConcreteCompilerType* INT = &_INT;
......@@ -1122,6 +1162,13 @@ public:
}
virtual ConcreteCompilerType* getBoxType() { return BOXED_FLOAT; }
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == 1);
double d = *reinterpret_cast<const double*>(&vals[0]);
return boxFloat(d);
}
} _FLOAT;
ConcreteCompilerType* FLOAT = &_FLOAT;
......@@ -1171,6 +1218,15 @@ public:
assert(is_well_defined);
return typeFromClass(cls);
}
void serializeToFrame(VAR* var, std::vector<llvm::Value*>& stackmap_args) override { abort(); }
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == numFrameArgs());
abort();
}
int numFrameArgs() override { return 0; }
};
std::unordered_map<BoxedClass*, KnownClassobjType*> KnownClassobjType::made;
......@@ -1490,6 +1546,11 @@ public:
virtual BoxedClass* guaranteedClass() { return cls; }
virtual ConcreteCompilerType* getBoxType() { return this; }
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == 1);
return reinterpret_cast<Box*>(vals[0]);
}
};
std::unordered_map<BoxedClass*, NormalObjectType*> NormalObjectType::made;
ConcreteCompilerType* STR, *BOXED_INT, *BOXED_FLOAT, *BOXED_BOOL, *NONE;
......@@ -1513,12 +1574,15 @@ public:
}
virtual ConcreteCompilerType* getConcreteType() { return this; }
// Shouldn't call this:
virtual ConcreteCompilerType* getBoxType() { RELEASE_ASSERT(0, ""); }
virtual ConcreteCompilerType* getBoxType() { return this; }
void drop(IREmitter& emitter, VAR* var) override {}
void grab(IREmitter& emitter, VAR* var) override {}
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == 1);
abort();
}
} _CLOSURE;
ConcreteCompilerType* CLOSURE = &_CLOSURE;
......@@ -1536,6 +1600,11 @@ public:
virtual void grab(IREmitter& emitter, VAR* var) {
// pass
}
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == numFrameArgs());
abort();
}
} _GENERATOR;
ConcreteCompilerType* GENERATOR = &_GENERATOR;
......@@ -1612,6 +1681,18 @@ public:
}
return rtn;
}
void serializeToFrame(VAR* var, std::vector<llvm::Value*>& stackmap_args) override {
stackmap_args.push_back(embedConstantPtr(var->getValue(), g.i8_ptr));
}
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == numFrameArgs());
return boxStringPtr(reinterpret_cast<std::string*>(vals[0]));
}
int numFrameArgs() override { return 1; }
};
static ValuedCompilerType<const std::string*>* STR_CONSTANT = new StrConstantType();
......@@ -1622,6 +1703,8 @@ CompilerVariable* makeStr(const std::string* s) {
class VoidType : public ConcreteCompilerType {
public:
llvm::Type* llvmType() { return g.void_; }
Box* deserializeFromFrame(const FrameVals& vals) override { abort(); }
};
ConcreteCompilerType* VOID = new VoidType();
......@@ -1630,17 +1713,14 @@ ConcreteCompilerType* typeFromClass(BoxedClass* c) {
return NormalObjectType::fromClass(c);
}
// Due to a temporary LLVM limitation, can represent bools as i64's instead of i1's.
// #define BOOLS_AS_I64
class BoolType : public ConcreteCompilerType {
public:
std::string debugName() { return "bool"; }
llvm::Type* llvmType() {
#ifdef BOOLS_AS_I64
return g.i64;
#else
return g.i1;
#endif
if (BOOLS_AS_I64)
return g.i64;
else
return g.i1;
}
virtual bool isFitBy(BoxedClass* c) { return false; }
......@@ -1704,6 +1784,13 @@ public:
}
virtual ConcreteCompilerType* getBoxType() { return BOXED_BOOL; }
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == 1);
assert(llvmType() == g.i64);
bool b = (bool)vals[0];
return boxBool(b);
}
};
ConcreteCompilerType* BOOL = new BoolType();
ConcreteCompilerVariable* makeBool(bool b) {
......@@ -1858,6 +1945,38 @@ public:
return makeConverted(emitter, var, getConcreteType())
->callattr(emitter, info, attr, clsonly, argspec, args, keyword_names);
}
void serializeToFrame(VAR* var, std::vector<llvm::Value*>& stackmap_args) override {
for (auto v : *var->getValue()) {
v->serializeToFrame(stackmap_args);
}
}
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == numFrameArgs());
BoxedTuple::GCVector elts;
int cur_idx = 0;
for (auto e : elt_types) {
int num_args = e->numFrameArgs();
// TODO: inefficient to make these copies
FrameVals sub_vals(vals.begin() + cur_idx, vals.begin() + cur_idx + num_args);
elts.push_back(e->deserializeFromFrame(sub_vals));
cur_idx += num_args;
}
assert(cur_idx == vals.size());
return new BoxedTuple(std::move(elts));
}
int numFrameArgs() override {
int rtn = 0;
for (auto e : elt_types)
rtn += e->numFrameArgs();
return rtn;
}
};
CompilerType* makeTupleType(const std::vector<CompilerType*>& elt_types) {
......@@ -1941,6 +2060,11 @@ public:
virtual bool canConvertTo(ConcreteCompilerType* other_type) { return true; }
virtual BoxedClass* guaranteedClass() { return NULL; }
Box* deserializeFromFrame(const FrameVals& vals) override {
assert(vals.size() == 1);
abort();
}
} _UNDEF;
CompilerType* UNDEF = &_UNDEF;
......@@ -1949,25 +2073,25 @@ ConcreteCompilerVariable* undefVariable() {
}
ConcreteCompilerVariable* boolFromI1(IREmitter& emitter, llvm::Value* v) {
#ifdef BOOLS_AS_I64
assert(v->getType() == g.i1);
assert(BOOL->llvmType() == g.i64);
llvm::Value* v2 = emitter.getBuilder()->CreateZExt(v, BOOL->llvmType());
return new ConcreteCompilerVariable(BOOL, v2, true);
#else
return new ConcreteCompilerVariable(BOOL, v, true);
#endif
if (BOOLS_AS_I64) {
assert(v->getType() == g.i1);
assert(BOOL->llvmType() == g.i64);
llvm::Value* v2 = emitter.getBuilder()->CreateZExt(v, BOOL->llvmType());
return new ConcreteCompilerVariable(BOOL, v2, true);
} else {
return new ConcreteCompilerVariable(BOOL, v, true);
}
}
llvm::Value* i1FromBool(IREmitter& emitter, ConcreteCompilerVariable* v) {
#ifdef BOOLS_AS_I64
assert(v->getType() == BOOL);
assert(BOOL->llvmType() == g.i64);
llvm::Value* v2 = emitter.getBuilder()->CreateTrunc(v->getValue(), g.i1);
return v2;
#else
return v->getValue();
#endif
if (BOOLS_AS_I64) {
assert(v->getType() == BOOL);
assert(BOOL->llvmType() == g.i64);
llvm::Value* v2 = emitter.getBuilder()->CreateTrunc(v->getValue(), g.i1);
return v2;
} else {
return v->getValue();
}
}
......
......@@ -18,6 +18,7 @@
#include <stdint.h>
#include <vector>
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
......@@ -31,6 +32,8 @@ class OpInfo;
class CompilerType;
class IREmitter;
typedef llvm::SmallVector<uint64_t, 1> FrameVals;
class CompilerType {
public:
virtual ~CompilerType() {}
......@@ -42,6 +45,8 @@ public:
virtual CompilerType* callType(ArgPassSpec argspec, const std::vector<CompilerType*>& arg_types,
const std::vector<const std::string*>* keyword_names) = 0;
virtual BoxedClass* guaranteedClass() = 0;
virtual Box* deserializeFromFrame(const FrameVals& vals) = 0;
virtual int numFrameArgs() = 0;
};
typedef std::unordered_map<CompilerVariable*, CompilerVariable*> DupCache;
......@@ -152,6 +157,7 @@ public:
ASSERT((CompilerType*)getConcreteType() != this, "%s", debugName().c_str());
return getConcreteType()->guaranteedClass();
}
virtual void serializeToFrame(VAR* v, std::vector<llvm::Value*>& stackmap_args) = 0;
};
template <class V> class ValuedCompilerType : public _ValuedCompilerType<V> { public: };
......@@ -180,6 +186,8 @@ public:
virtual bool canConvertTo(ConcreteCompilerType* other_type) { return other_type == this || other_type == UNKNOWN; }
virtual ConcreteCompilerVariable* makeConverted(IREmitter& emitter, ConcreteCompilerVariable* var,
ConcreteCompilerType* other_type);
void serializeToFrame(VAR* var, std::vector<llvm::Value*>& stackmap_args) override final;
int numFrameArgs() override final { return 1; }
};
class CompilerVariable {
......@@ -254,6 +262,8 @@ public:
virtual CompilerVariable* getitem(IREmitter& emitter, const OpInfo& info, CompilerVariable*) = 0;
virtual CompilerVariable* binexp(IREmitter& emitter, const OpInfo& info, CompilerVariable* rhs,
AST_TYPE::AST_TYPE op_type, BinExpType exp_type) = 0;
virtual void serializeToFrame(std::vector<llvm::Value*>& stackmap_args) = 0;
};
template <class V> class ValuedCompilerVariable : public CompilerVariable {
......@@ -343,6 +353,10 @@ public:
}
BoxedClass* guaranteedClass() override { return type->guaranteedClass(); }
void serializeToFrame(std::vector<llvm::Value*>& stackmap_args) override {
type->serializeToFrame(this, stackmap_args);
}
};
// template <>
......
......@@ -650,7 +650,37 @@ static void emitBBs(IRGenState* irstate, const char* bb_type, GuardList& out_gua
// Assert that the phi_st is empty, and just create the symbol table from the non-phi st:
ASSERT(phi_ending_symbol_tables[pred]->size() == 0, "%d %d", block->idx, pred->idx);
assert(ending_symbol_tables.count(pred));
generator->copySymbolsFrom(ending_symbol_tables[pred]);
// Filter out any names set by an invoke statement at the end
// of the previous block, if we're in the unwind path.
// This definitely doesn't seem like the most elegant way to do this,
// but the rest of the analysis frameworks can't (yet) support the idea of
// a block flowing differently to its different predecessors.
auto pred = block->predecessors[0];
auto last_inst = pred->body.back();
SymbolTable* sym_table = ending_symbol_tables[pred];
bool created_new_sym_table = false;
if (last_inst->type == AST_TYPE::Invoke) {
auto invoke = ast_cast<AST_Invoke>(last_inst);
if (invoke->exc_dest == block && invoke->stmt->type == AST_TYPE::Assign) {
auto asgn = ast_cast<AST_Assign>(invoke->stmt);
assert(asgn->targets.size() == 1);
if (asgn->targets[0]->type == AST_TYPE::Name) {
auto name = ast_cast<AST_Name>(asgn->targets[0]);
// TODO: inneficient
sym_table = new SymbolTable(*sym_table);
assert(sym_table->count(name->id));
sym_table->erase(name->id);
created_new_sym_table = true;
}
}
}
generator->copySymbolsFrom(sym_table);
if (created_new_sym_table)
delete sym_table;
} else {
// With multiple predecessors, the symbol tables at the end of each predecessor should be *exactly* the
// same.
......
......@@ -142,6 +142,8 @@ private:
pp_args.insert(pp_args.end(), ic_stackmap_args.begin(), ic_stackmap_args.end());
irgenerator->addFrameStackmapArgs(info, pp_args);
llvm::Intrinsic::ID intrinsic_id;
if (return_type->isIntegerTy() || return_type->isPointerTy()) {
intrinsic_id = llvm::Intrinsic::experimental_patchpoint_i64;
......@@ -181,8 +183,27 @@ public:
CompiledFunction* currentFunction() override { return irstate->getCurFunction(); }
llvm::Value* createCall(ExcInfo exc_info, llvm::Value* callee, const std::vector<llvm::Value*>& args) override {
llvm::CallSite cs = this->emitCall(exc_info, callee, args);
return cs.getInstruction();
if (ENABLE_FRAME_INTROSPECTION) {
llvm::Type* rtn_type = llvm::cast<llvm::FunctionType>(llvm::cast<llvm::PointerType>(callee->getType())
->getElementType())->getReturnType();
llvm::Value* bitcasted = getBuilder()->CreateBitCast(callee, g.i8->getPointerTo());
llvm::CallSite cs = emitPatchpoint(rtn_type, NULL, bitcasted, args, {}, exc_info);
if (rtn_type == cs->getType()) {
return cs.getInstruction();
} else if (rtn_type == g.i1) {
return getBuilder()->CreateTrunc(cs.getInstruction(), rtn_type);
} else if (llvm::isa<llvm::PointerType>(rtn_type)) {
return getBuilder()->CreateIntToPtr(cs.getInstruction(), rtn_type);
} else {
cs.getInstruction()->getType()->dump();
rtn_type->dump();
RELEASE_ASSERT(0, "don't know how to convert those");
}
} else {
return emitCall(exc_info, callee, args).getInstruction();
}
}
llvm::Value* createCall(ExcInfo exc_info, llvm::Value* callee, llvm::Value* arg1) override {
......@@ -2215,6 +2236,23 @@ private:
}
public:
void addFrameStackmapArgs(PatchpointInfo* pp, std::vector<llvm::Value*>& stackmap_args) {
int initial_args = stackmap_args.size();
if (ENABLE_FRAME_INTROSPECTION) {
// TODO: don't need to use a sorted symbol table if we're explicitly recording the names!
// nice for debugging though.
SortedSymbolTable sorted_symbol_table(symbol_table.begin(), symbol_table.end());
for (const auto& p : sorted_symbol_table) {
CompilerVariable* v = p.second;
v->serializeToFrame(stackmap_args);
pp->addFrameVar(p.first, v->getType());
}
}
pp->setNumFrameArgs(stackmap_args.size() - initial_args);
}
EndingState getEndingSymbolTable() override {
assert(state == FINISHED || state == DEAD);
......
......@@ -34,6 +34,7 @@ namespace pyston {
class CFGBlock;
class GCBuilder;
class PatchpointInfo;
class ScopeInfo;
class TypeAnalysis;
......@@ -196,6 +197,7 @@ public:
virtual void run(const CFGBlock* block) = 0;
virtual EndingState getEndingSymbolTable() = 0;
virtual void doSafePoint() = 0;
virtual void addFrameStackmapArgs(PatchpointInfo* pp, std::vector<llvm::Value*>& stackmap_args) = 0;
};
class IREmitter;
......
......@@ -25,6 +25,7 @@
#include "llvm/IR/Module.h"
#include "codegen/codegen.h"
#include "codegen/compvars.h"
#include "codegen/irgen/hooks.h"
#include "codegen/irgen/util.h"
#include "codegen/patchpoints.h"
......@@ -32,6 +33,7 @@
#include "core/stats.h"
#include "core/thread_utils.h"
#include "core/util.h"
#include "runtime/types.h"
//#undef VERBOSITY
//#define VERBOSITY(x) 2
......@@ -230,6 +232,60 @@ void gatherInterpreterRoots(GCVisitor* visitor) {
visitor);
}
BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible) {
llvm::Instruction* inst = cur_instruction_map[frame_ptr];
assert(inst);
const SymMap* syms = root_stack_set.get()->back();
assert(syms);
ASSERT(llvm::isa<llvm::CallInst>(inst) || llvm::isa<llvm::InvokeInst>(inst),
"trying to unwind from not within a patchpoint!");
llvm::CallSite CS(inst);
llvm::Function* f = CS.getCalledFunction();
assert(startswith(f->getName(), "llvm.experimental.patchpoint."));
llvm::Value* pp_arg = CS.getArgument(0);
int64_t pp_id = llvm::cast<llvm::ConstantInt>(pp_arg)->getSExtValue();
PatchpointInfo* pp = reinterpret_cast<PatchpointInfo*>(pp_id);
llvm::DataLayout dl(inst->getParent()->getParent()->getParent());
BoxedDict* rtn = new BoxedDict();
int stackmap_args_start = 4 + llvm::cast<llvm::ConstantInt>(CS.getArgument(3))->getZExtValue();
assert(CS.arg_size() == stackmap_args_start + pp->totalStackmapArgs());
// TODO: too much duplication here with other code
int cur_arg = pp->frameStackmapArgsStart();
for (const PatchpointInfo::FrameVarInfo& frame_var : pp->getFrameVars()) {
int num_args = frame_var.type->numFrameArgs();
if (only_user_visible && (frame_var.name[0] == '!' || frame_var.name[0] == '#')) {
cur_arg += num_args;
continue;
}
llvm::SmallVector<uint64_t, 1> vals;
for (int i = cur_arg, e = cur_arg + num_args; i < e; i++) {
Val r = fetch(CS.getArgument(stackmap_args_start + i), dl, *syms);
vals.push_back(r.n);
}
Box* b = frame_var.type->deserializeFromFrame(vals);
ASSERT(gc::isValidGCObject(b), "%p", b);
rtn->d[boxString(frame_var.name)] = b;
cur_arg += num_args;
}
assert(cur_arg - pp->frameStackmapArgsStart() == pp->numFrameStackmapArgs());
return rtn;
}
class UnregisterHelper {
private:
void* frame_ptr;
......
......@@ -22,6 +22,7 @@ class Function;
namespace pyston {
class Box;
class BoxedDict;
class GCVisitor;
class LineInfo;
......@@ -30,6 +31,7 @@ Box* interpretFunction(llvm::Function* f, int nargs, Box* closure, Box* generato
void gatherInterpreterRoots(GCVisitor* visitor);
const LineInfo* getLineInfoForInterpretedFrame(void* frame_ptr);
BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible);
}
#endif
......@@ -28,11 +28,15 @@
namespace pyston {
void PatchpointInfo::addFrameVar(const std::string& name, CompilerType* type) {
frame_vars.push_back(FrameVarInfo({ .name = name, .type = type }));
}
int ICSetupInfo::totalSize() const {
int call_size = CALL_ONLY_SIZE;
if (getCallingConvention() != llvm::CallingConv::C) {
// 14 bytes per reg that needs to be spilled
call_size += 14 * 6;
call_size += 14 * 4;
}
return num_slots * slot_size + call_size;
}
......@@ -59,6 +63,33 @@ int PatchpointInfo::patchpointSize() {
return CALL_ONLY_SIZE;
}
void PatchpointInfo::parseLocationMap(StackMap::Record* r, LocationMap* map) {
assert(r->locations.size() == totalStackmapArgs());
int cur_arg = frameStackmapArgsStart();
// printf("parsing pp %ld:\n", reinterpret_cast<int64_t>(this));
for (FrameVarInfo& frame_var : frame_vars) {
int num_args = frame_var.type->numFrameArgs();
llvm::SmallVector<StackMap::Record::Location, 1> locations;
locations.append(&r->locations[cur_arg], &r->locations[cur_arg + num_args]);
// printf("%s %d %d\n", frame_var.name.c_str(), r->locations[cur_arg].type, r->locations[cur_arg].regnum);
map->names[frame_var.name].locations.push_back(
LocationMap::LocationTable::LocationEntry({ ._debug_pp_id = (uint64_t) this,
.offset = r->offset,
.length = patchpointSize(),
.type = frame_var.type,
.locations = std::move(locations) }));
cur_arg += num_args;
}
assert(cur_arg - frameStackmapArgsStart() == numFrameStackmapArgs());
}
static int extractScratchOffset(PatchpointInfo* pp, StackMap::Record* r) {
StackMap::Record::Location l = r->locations[pp->scratchStackmapArg()];
......@@ -95,8 +126,22 @@ static std::unordered_set<int> extractLiveOuts(StackMap::Record* r, llvm::Callin
}
void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
// FIXME: this is pretty hacky, that we don't delete the patchpoints here.
// We need them currently for the llvm interpreter.
// Eventually we'll get rid of that and use an AST interpreter, and then we won't need this hack.
if (cf->effort == EffortLevel::INTERPRETED) {
assert(!stackmap);
new_patchpoints.clear();
return;
}
int nrecords = stackmap ? stackmap->records.size() : 0;
assert(cf->location_map == NULL);
cf->location_map = new LocationMap();
if (stackmap)
cf->location_map->constants = stackmap->constants;
for (int i = 0; i < nrecords; i++) {
StackMap::Record* r = stackmap->records[i];
......@@ -126,12 +171,36 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
//*start_addr = 0xcc;
// start_addr++;
int nspills = 0;
SpillMap frame_remapped;
// TODO: if we pass the same llvm::Value as the stackmap args, we will get the same reg.
// we shouldn't then spill that multiple times.
// we could either deal with it here, or not put the same Value into the patchpoint
for (int j = pp->frameStackmapArgsStart(), e = j + pp->numFrameStackmapArgs(); j < e; j++) {
StackMap::Record::Location& l = r->locations[j];
// updates l, start_addr, scratch_rbp_offset, scratch_size:
bool spilled = spillFrameArgumentIfNecessary(l, start_addr, end_addr, scratch_rbp_offset, scratch_size,
frame_remapped);
if (spilled)
nspills++;
}
ASSERT(nspills <= MAX_FRAME_SPILLS, "did %d spills but expected only %d!", nspills, MAX_FRAME_SPILLS);
assert(scratch_size % sizeof(void*) == 0);
assert(scratch_rbp_offset % sizeof(void*) == 0);
// TODO: is something like this necessary?
// llvm::sys::Memory::InvalidateInstructionCache(start, getSlotSize());
pp->parseLocationMap(r, cf->location_map);
const ICSetupInfo* ic = pp->getICInfo();
if (ic == NULL) {
// We have to be using the C calling convention here, so we don't need to check the live outs
// or save them across the call.
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size,
std::unordered_set<int>());
std::unordered_set<int>(), frame_remapped);
continue;
}
......@@ -151,8 +220,8 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
auto _p
= initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size, live_outs);
auto _p = initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size,
live_outs, frame_remapped);
uint8_t* slowpath_start = _p.first;
uint8_t* slowpath_rtn_addr = _p.second;
......
......@@ -31,18 +31,32 @@ struct StackMap;
class TypeRecorder;
class ICSetupInfo;
static const int CALL_ONLY_SIZE = 13;
static const int MAX_FRAME_SPILLS = 9; // TODO this shouldn't have to be larger than the set of non-callee-save args (9)
// except that will we currently spill the same reg multiple times
static const int CALL_ONLY_SIZE
= 13 + (MAX_FRAME_SPILLS * 9)
+ 1; // 13 for the call, 9 bytes per spill (7 for GP, 9 for XMM), + 1 if we want to nop/trap
void processStackmap(CompiledFunction* cf, StackMap* stackmap);
struct PatchpointInfo {
public:
struct FrameVarInfo {
std::string name;
CompilerType* type;
};
private:
CompiledFunction* const parent_cf;
const ICSetupInfo* icinfo;
int num_ic_stackmap_args;
int num_frame_stackmap_args;
std::vector<FrameVarInfo> frame_vars;
PatchpointInfo(CompiledFunction* parent_cf, const ICSetupInfo* icinfo, int num_ic_stackmap_args)
: parent_cf(parent_cf), icinfo(icinfo), num_ic_stackmap_args(num_ic_stackmap_args) {}
: parent_cf(parent_cf), icinfo(icinfo), num_ic_stackmap_args(num_ic_stackmap_args),
num_frame_stackmap_args(-1) {}
public:
const ICSetupInfo* getICInfo() { return icinfo; }
......@@ -50,13 +64,29 @@ public:
int patchpointSize();
CompiledFunction* parentFunction() { return parent_cf; }
const std::vector<FrameVarInfo>& getFrameVars() { return frame_vars; }
int scratchStackmapArg() { return 0; }
int scratchSize() { return 80; }
int scratchSize() { return 80 + MAX_FRAME_SPILLS * sizeof(void*); }
void addFrameVar(const std::string& name, CompilerType* type);
void setNumFrameArgs(int num_frame_args) {
assert(num_frame_stackmap_args == -1);
num_frame_stackmap_args = num_frame_args;
}
int icStackmapArgsStart() { return 1; }
int numICStackmapArgs() { return num_ic_stackmap_args; }
int totalStackmapArgs() { return icStackmapArgsStart() + numICStackmapArgs(); }
int frameStackmapArgsStart() { return icStackmapArgsStart() + numICStackmapArgs(); }
int numFrameStackmapArgs() {
assert(num_frame_stackmap_args >= 0);
return num_frame_stackmap_args;
}
void parseLocationMap(StackMap::Record* r, LocationMap* map);
int totalStackmapArgs() { return frameStackmapArgsStart() + numFrameStackmapArgs(); }
static PatchpointInfo* create(CompiledFunction* parent_cf, const ICSetupInfo* icinfo, int num_ic_stackmap_args);
};
......
......@@ -16,14 +16,19 @@
#define PYSTON_CODEGEN_STACKMAPS_H
#include <cstdint>
#include <unordered_map>
#include <vector>
#include "llvm/ADT/SmallVector.h"
namespace llvm {
class JITEventListener;
}
namespace pyston {
class CompilerType;
struct StackMap {
struct __attribute__((__packed__)) StackSizeRecord {
uint64_t offset;
......@@ -64,6 +69,25 @@ struct StackMap {
std::vector<Record*> records;
};
// TODO this belongs somewhere else?
class LocationMap {
public:
std::vector<uint64_t> constants;
struct LocationTable {
struct LocationEntry {
uint64_t _debug_pp_id;
unsigned offset;
int length;
CompilerType* type;
llvm::SmallVector<StackMap::Record::Location, 1> locations;
};
std::vector<LocationEntry> locations;
};
std::unordered_map<std::string, LocationTable> names;
};
StackMap* parseStackMap();
llvm::JITEventListener* makeStackMapListener();
}
......
......@@ -27,6 +27,8 @@
#include "codegen/compvars.h"
#include "codegen/irgen/hooks.h"
#include "codegen/llvm_interpreter.h"
#include "codegen/stackmaps.h"
#include "runtime/types.h"
#define UNW_LOCAL_ONLY
......@@ -333,7 +335,7 @@ public:
return *this;
}
const PythonFrameIterator& operator*() const {
PythonFrameIterator& operator*() const {
assert(it.get());
return *it.get();
}
......@@ -397,6 +399,71 @@ BoxedModule* getCurrentModule() {
return getTopCompiledFunction()->clfunc->source->parent_module;
}
BoxedDict* getLocals(bool only_user_visible) {
BoxedDict* d = new BoxedDict();
for (PythonFrameIterator& frame_info : unwindPythonFrames()) {
if (frame_info.getId().type == PythonFrameId::COMPILED) {
CompiledFunction* cf = frame_info.getCF();
uint64_t ip = frame_info.getId().ip;
assert(ip > cf->code_start);
unsigned offset = ip - cf->code_start;
assert(cf->location_map);
for (const auto& p : cf->location_map->names) {
if (only_user_visible && (p.first[0] == '#' || p.first[0] == '!'))
continue;
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
if (e.offset < offset && offset <= e.offset + e.length) {
const auto& locs = e.locations;
llvm::SmallVector<uint64_t, 1> vals;
// printf("%s: %s\n", p.first.c_str(), e.type->debugName().c_str());
for (auto& loc : locs) {
uint64_t n;
// printf("%d %d %d %d\n", loc.type, loc.flags, loc.regnum, loc.offset);
if (loc.type == StackMap::Record::Location::LocationType::Register) {
// TODO: need to make sure we deal with patchpoints appropriately
n = frame_info.getReg(loc.regnum);
} else if (loc.type == StackMap::Record::Location::LocationType::Indirect) {
uint64_t reg_val = frame_info.getReg(loc.regnum);
uint64_t addr = reg_val + loc.offset;
n = *reinterpret_cast<uint64_t*>(addr);
} else if (loc.type == StackMap::Record::Location::LocationType::Constant) {
n = loc.offset;
} else if (loc.type == StackMap::Record::Location::LocationType::ConstIndex) {
int const_idx = loc.offset;
assert(const_idx >= 0);
assert(const_idx < cf->location_map->constants.size());
n = cf->location_map->constants[const_idx];
} else {
printf("%d %d %d %d\n", loc.type, loc.flags, loc.regnum, loc.offset);
abort();
}
vals.push_back(n);
}
Box* v = e.type->deserializeFromFrame(vals);
// printf("%s: (pp id %ld) %p\n", p.first.c_str(), e._debug_pp_id, v);
assert(gc::isValidGCObject(v));
d->d[boxString(p.first)] = v;
}
}
}
} else if (frame_info.getId().type == PythonFrameId::INTERPRETED) {
return localsForInterpretedFrame((void*)frame_info.getId().bp, only_user_visible);
} else {
abort();
}
}
return d;
}
llvm::JITEventListener* makeTracebacksListener() {
return new TracebacksEventListener();
......
......@@ -55,4 +55,7 @@ bool ENABLE_INLINING = 1 && _GLOBAL_ENABLE;
bool ENABLE_REOPT = 1 && _GLOBAL_ENABLE;
bool ENABLE_PYSTON_PASSES = 1 && _GLOBAL_ENABLE;
bool ENABLE_TYPE_FEEDBACK = 1 && _GLOBAL_ENABLE;
bool ENABLE_FRAME_INTROSPECTION = 0;
bool BOOLS_AS_I64 = ENABLE_FRAME_INTROSPECTION;
}
......@@ -35,7 +35,10 @@ extern bool SHOW_DISASM, FORCE_OPTIMIZE, BENCH, PROFILE, DUMPJIT, TRAP, USE_STRI
extern bool ENABLE_ICS, ENABLE_ICGENERICS, ENABLE_ICGETITEMS, ENABLE_ICSETITEMS, ENABLE_ICDELITEMS, ENABLE_ICBINEXPS,
ENABLE_ICNONZEROS, ENABLE_ICCALLSITES, ENABLE_ICSETATTRS, ENABLE_ICGETATTRS, ENALBE_ICDELATTRS, ENABLE_ICGETGLOBALS,
ENABLE_SPECULATION, ENABLE_OSR, ENABLE_LLVMOPTS, ENABLE_INLINING, ENABLE_REOPT, ENABLE_PYSTON_PASSES,
ENABLE_TYPE_FEEDBACK;
ENABLE_TYPE_FEEDBACK, ENABLE_FRAME_INTROSPECTION;
// Due to a temporary LLVM limitation, represent bools as i64's instead of i1's.
extern bool BOOLS_AS_I64;
}
}
......
......@@ -159,6 +159,7 @@ class BoxedClosure;
class BoxedGenerator;
class LineTable;
class ICInfo;
class LocationMap;
struct CompiledFunction {
private:
......@@ -188,13 +189,16 @@ public:
// Unfortunately, can't make this a std::unique_ptr if we want to forward-declare LineTable:
LineTable* line_table;
LocationMap* location_map; // only meaningful if this is a compiled frame
std::vector<ICInfo*> ics;
CompiledFunction(llvm::Function* func, FunctionSpecialization* spec, bool is_interpreted, void* code,
llvm::Value* llvm_code, EffortLevel::EffortLevel effort,
const OSREntryDescriptor* entry_descriptor)
: clfunc(NULL), func(func), spec(spec), entry_descriptor(entry_descriptor), is_interpreted(is_interpreted),
code(code), llvm_code(llvm_code), effort(effort), times_called(0), line_table(nullptr) {}
code(code), llvm_code(llvm_code), effort(effort), times_called(0), line_table(nullptr),
location_map(nullptr) {}
// TODO this will need to be implemented eventually, and delete line_table if it exists
~CompiledFunction();
......
......@@ -20,6 +20,7 @@
#include "codegen/irgen/hooks.h"
#include "codegen/parser.h"
#include "codegen/unwinding.h"
#include "core/ast.h"
#include "core/types.h"
#include "gc/collector.h"
......@@ -573,6 +574,10 @@ Box* globals() {
return makeAttrWrapper(m);
}
Box* locals() {
return getLocals(true /* filter */);
}
Box* divmod(Box* lhs, Box* rhs) {
return binopInternal(lhs, rhs, AST_TYPE::DivMod, false, NULL);
}
......@@ -788,6 +793,7 @@ void setupBuiltins() {
builtins_module->giveAttr("open", open_obj);
builtins_module->giveAttr("globals", new BoxedFunction(boxRTFunction((void*)globals, UNKNOWN, 0, 0, false, false)));
builtins_module->giveAttr("locals", new BoxedFunction(boxRTFunction((void*)locals, UNKNOWN, 0, 0, false, false)));
builtins_module->giveAttr("iter", new BoxedFunction(boxRTFunction((void*)getiter, UNKNOWN, 1, 0, false, false)));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment