Commit c83f33a3 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Use patchpoints to skip decref jit overhead

Represent decref/xdecref operations as opaque functions calls.
I think the ideal solution would be to add a custom llvm intrinsic,
but I spent a small amount of time looking into that and had trouble
figuring out how to do that.

So instead, just emit them as patchpoints, and then patch them afterwards
with a fixed code sequence.

This commit only does this for decref/xdecref because:
- they occur much more frequently
- they are much more expensive to jit since they involve control flow
- forcing the op to fit a C-calling-convention isn't that much overhead,
  since the register allocator probably would have done that anyway due
  to the (potential) dealloc call.
parent 31a03a8d
......@@ -267,8 +267,16 @@ void addDecrefs(llvm::Value* v, bool nullable, int num_refs, llvm::Instruction*
return;
}
assert(!nullable);
RELEASE_ASSERT(num_refs == 1, "decref patchpoints don't support >1 refs");
llvm::Function* patchpoint
= llvm::Intrinsic::getDeclaration(g.cur_module, llvm::Intrinsic::experimental_patchpoint_void);
int pp_id = nullable ? XDECREF_PP_ID : DECREF_PP_ID;
int pp_size = nullable ? XDECREF_PP_SIZE : DECREF_PP_SIZE;
builder.CreateCall(patchpoint, { getConstantInt(pp_id, g.i64), getConstantInt(pp_size, g.i32), getNullPtr(g.i8_ptr),
getConstantInt(1, g.i32), v });
#if 0
// Deal with subtypes of Box:
while (v->getType() != g.llvm_value_type_ptr) {
v = builder.CreateConstInBoundsGEP2_32(v, 0, 0);
......@@ -330,6 +338,7 @@ void addDecrefs(llvm::Value* v, bool nullable, int num_refs, llvm::Instruction*
builder.CreateBr(continue_block);
builder.SetInsertPoint(continue_block);
#endif
}
void addCXXFixup(llvm::Instruction* inst, const llvm::SmallVector<llvm::TrackingVH<llvm::Value>, 4>& to_decref,
......
......@@ -215,6 +215,10 @@ template <typename I> void remapPatchpoint(I* ii) {
pp_id = l_pp_id->getSExtValue();
} else if (i == 2) {
assert(pp_id != -1);
if (pp_id == DECREF_PP_ID || pp_id == XDECREF_PP_ID)
continue;
void* addr = PatchpointInfo::getSlowpathAddr(pp_id);
bool lookup_success = true;
......
......@@ -26,6 +26,7 @@
#include "core/options.h"
#include "core/stats.h"
#include "core/types.h"
#include "runtime/types.h"
namespace pyston {
......@@ -135,6 +136,70 @@ static LiveOutSet extractLiveOuts(StackMap::Record* r, llvm::CallingConv::ID cc)
return live_outs;
}
#ifdef Py_TRACE_REFS
#error "trace_refs not supported yet"
#else
#ifndef Py_REF_DEBUG
static char decref_code[] =
"\x48\xff\x0f" // decq (%rdi)
"\x75\x07" // jne +7
"\x48\x8b\x47\x08" // mov 0x8(%rdi),%rax
"\xff\x50\x30" // callq *0x30(%rax)
;
static char xdecref_code[] =
"\x48\x85\xff" // test %rdi,%rdi
"\x74\x0c" // je +12
"\x48\xff\x0f" // decq (%rdi)
"\x75\x07" // jne +7
"\x48\x8b\x47\x08" // mov 0x8(%rdi),%rax
"\xff\x50\x30" // callq *0x30(%rax)
;
#else // #ifdef Py_REF_DEBUG:
static void _decref(Box* b) {
Py_DECREF(b);
}
static char decref_code[] =
"\x48\xb8\x00\x00\x00\x00\x00\x00\x00\x00" // movabs $0x00, %rax
"\xff\xd0" // callq *%rax
;
static void _xdecref(Box* b) {
Py_XDECREF(b);
}
static char xdecref_code[] =
"\x48\xb8\x00\x00\x00\x00\x00\x00\x00\x00" // movabs $0x00, %rax
"\xff\xd0" // callq *%rax
;
namespace {
class _Initializer {
public:
_Initializer() {
void* p = (void*)&_decref;
memcpy(decref_code+2, &p, sizeof(p));
p = (void*)&_xdecref;
memcpy(xdecref_code+2, &p, sizeof(p));
}
} _i;
}
#endif
#endif
const int DECREF_PP_SIZE = sizeof(decref_code) - 1; // -1 for the NUL byte
const int XDECREF_PP_SIZE = sizeof(xdecref_code) - 1; // -1 for the NUL byte
void emitDecref(void* addr) {
memcpy(addr, decref_code, DECREF_PP_SIZE);
}
void emitXDecref(void* addr) {
memcpy(addr, xdecref_code, XDECREF_PP_SIZE);
}
void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
int nrecords = stackmap ? stackmap->records.size() : 0;
......@@ -150,6 +215,15 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
const StackMap::StackSizeRecord& stack_size_record = stackmap->stack_size_records[0];
int stack_size = stack_size_record.stack_size;
if (r->id == DECREF_PP_ID) {
emitDecref((uint8_t*)cf->code + r->offset);
continue;
}
if (r->id == XDECREF_PP_ID) {
emitXDecref((uint8_t*)cf->code + r->offset);
continue;
}
RELEASE_ASSERT(new_patchpoints.size() > r->id, "");
PatchpointInfo* pp = new_patchpoints[r->id].first;
......@@ -282,6 +356,9 @@ PatchpointInfo* PatchpointInfo::create(CompiledFunction* parent_cf, const ICSetu
auto* r = new PatchpointInfo(parent_cf, icinfo, num_ic_stackmap_args);
r->id = new_patchpoints.size();
new_patchpoints.push_back(std::make_pair(r, func_addr));
assert(r->id != DECREF_PP_ID);
assert(r->id != XDECREF_PP_ID);
return r;
}
......
......@@ -30,6 +30,11 @@ class CompilerType;
struct StackMap;
class TypeRecorder;
static const int DECREF_PP_ID = 1000000;
extern const int DECREF_PP_SIZE;
static const int XDECREF_PP_ID = 1000001;
extern const int XDECREF_PP_SIZE;
static const int MAX_FRAME_SPILLS = 9; // TODO this shouldn't have to be larger than the set of non-callee-save args (9)
// except that will we currently spill the same reg multiple times
static const int CALL_ONLY_SIZE = 13 + 1; // 13 for the call, + 1 if we want to nop/trap
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment