Commit e7174486 authored by Marius Wachtler's avatar Marius Wachtler

Merge pull request #1227 from undingen/bjit_opt2

major bjit improvements
parents 820dd0cd 9fd4924f
......@@ -755,6 +755,7 @@ void Assembler::incq(Indirect mem) {
}
assert(src_idx >= 0 && src_idx < 8);
bool needssib = (src_idx == 0b100);
if (rex)
emitRex(rex);
......@@ -763,8 +764,12 @@ void Assembler::incq(Indirect mem) {
assert(-0x80 <= mem.offset && mem.offset < 0x80);
if (mem.offset == 0) {
emitModRM(0b00, 0, src_idx);
if (needssib)
emitSIB(0b00, 0b100, src_idx);
} else {
emitModRM(0b01, 0, src_idx);
if (needssib)
emitSIB(0b00, 0b100, src_idx);
emitByte(mem.offset);
}
}
......
......@@ -24,7 +24,7 @@
namespace pyston {
static const assembler::Register allocatable_regs[] = {
static const assembler::Register std_allocatable_regs[] = {
assembler::RAX, assembler::RCX, assembler::RDX,
// no RSP
// no RBP
......@@ -540,8 +540,8 @@ void Rewriter::_incref(RewriterVar* var, int num_refs) {
// this->_trap();
// this->_call(NULL, true, (void*)Helper::incref, llvm::ArrayRef<RewriterVar*>(&var, 1),
// llvm::ArrayRef<RewriterVar*>());
// this->_call(NULL, true, false /* can't throw */, (void*)Helper::incref, { var });
#ifdef Py_REF_DEBUG
// assembler->trap();
for (int i = 0; i < num_refs; ++i)
......@@ -569,16 +569,13 @@ void Rewriter::_decref(RewriterVar* var) {
assert(!var->nullable);
// assembler->trap();
// this->_call(NULL, true, (void*)Helper::decref, llvm::ArrayRef<RewriterVar*>(&var, 1),
// llvm::ArrayRef<RewriterVar*>(NULL, (int)0));
// this->_call(NULL, true, false /* can't throw */, (void*)Helper::decref, { var });
#ifdef Py_REF_DEBUG
// assembler->trap();
assembler->decq(assembler::Immediate(&_Py_RefTotal));
#endif
_setupCall(true, llvm::ArrayRef<RewriterVar*>(&var, 1), llvm::ArrayRef<RewriterVar*>(NULL, (size_t)0),
assembler::RAX);
_setupCall(true, { var }, {}, assembler::RAX);
#ifdef Py_REF_DEBUG
......@@ -612,8 +609,7 @@ void Rewriter::_xdecref(RewriterVar* var) {
assert(var->nullable);
// assembler->trap();
this->_call(NULL, true, (void*)Helper::xdecref, llvm::ArrayRef<RewriterVar*>(&var, 1),
llvm::ArrayRef<RewriterVar*>(NULL, (size_t)0));
this->_call(NULL, true, false /* can't throw */, (void*)Helper::xdecref, { var });
// Doesn't call bumpUse, since this function is designed to be callable from other emitting functions.
// (ie the caller should call bumpUse)
......@@ -632,14 +628,18 @@ void Rewriter::_cmp(RewriterVar* result, RewriterVar* v1, AST_TYPE::AST_TYPE cmp
if (LOG_IC_ASSEMBLY)
assembler->comment("_cmp");
assembler::Register v1_reg = v1->getInReg();
assembler::Register v2_reg = v2->getInReg();
assembler::Register v1_reg = v1->getInReg(Location::any(), false, dest);
assembler::Register v2_reg = v2->getInReg(Location::any(), false, dest);
assert(v1_reg != v2_reg); // TODO how do we ensure this?
v1->bumpUseEarlyIfPossible();
v2->bumpUseEarlyIfPossible();
assembler::Register newvar_reg = allocReg(dest);
// sete and setne has special register requirements (can't use r8-r15)
const assembler::Register valid_registers[] = {
assembler::RAX, assembler::RCX, assembler::RDX, assembler::RSI, assembler::RDI,
};
assembler::Register newvar_reg = allocReg(dest, Location::any(), valid_registers);
result->initializeInReg(newvar_reg);
assembler->cmp(v1_reg, v2_reg);
switch (cmp_type) {
......@@ -916,82 +916,10 @@ RewriterVar* Rewriter::loadConst(int64_t val, Location dest) {
return const_loader_var;
}
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr) {
STAT_TIMER(t0, "us_timer_rewriter", 10);
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
return call(has_side_effects, func_addr, args, args_xmm);
}
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0) {
STAT_TIMER(t0, "us_timer_rewriter", 10);
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
return call(has_side_effects, func_addr, args, args_xmm);
}
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1) {
STAT_TIMER(t0, "us_timer_rewriter", 10);
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
args.push_back(arg1);
return call(has_side_effects, func_addr, args, args_xmm);
}
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1,
RewriterVar* arg2) {
STAT_TIMER(t0, "us_timer_rewriter", 10);
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
args.push_back(arg1);
args.push_back(arg2);
return call(has_side_effects, func_addr, args, args_xmm);
}
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1,
RewriterVar* arg2, RewriterVar* arg3) {
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, llvm::ArrayRef<RewriterVar*> args,
llvm::ArrayRef<RewriterVar*> args_xmm, llvm::ArrayRef<RewriterVar*> additional_uses) {
STAT_TIMER(t0, "us_timer_rewriter", 10);
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
args.push_back(arg1);
args.push_back(arg2);
args.push_back(arg3);
return call(has_side_effects, func_addr, args, args_xmm);
}
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1,
RewriterVar* arg2, RewriterVar* arg3, RewriterVar* arg4) {
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
args.push_back(arg1);
args.push_back(arg2);
args.push_back(arg3);
args.push_back(arg4);
return call(has_side_effects, func_addr, args, args_xmm);
}
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, const RewriterVar::SmallVector& args,
const RewriterVar::SmallVector& args_xmm) {
RewriterVar* result = createNewVar();
RewriterVar::SmallVector uses;
for (RewriterVar* v : args) {
assert(v != NULL);
uses.push_back(v);
}
for (RewriterVar* v : args_xmm) {
assert(v != NULL);
uses.push_back(v);
}
ActionType type;
if (has_side_effects)
......@@ -999,33 +927,65 @@ RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, const Rewrit
else
type = ActionType::NORMAL;
// It's not nice to pass llvm::SmallVectors through a closure, especially with our SmallFunction
// optimization, so just regionAlloc them and copy the data in:
RewriterVar** _args = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * args.size());
memcpy(_args, args.begin(), sizeof(RewriterVar*) * args.size());
RewriterVar** _args_xmm = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * args_xmm.size());
memcpy(_args_xmm, args_xmm.begin(), sizeof(RewriterVar*) * args_xmm.size());
int args_size = args.size();
assert(args_xmm.size() <= 0x7fff);
// Hack: pack this into a short to make sure it fits in the closure
short xmm_args_size = args_xmm.size();
// TODO: we don't need to generate the decref info for calls which can't throw
bool can_throw = true;
auto args_array_ref = regionAllocArgs(args, args_xmm, additional_uses);
// Hack: explicitly order the closure arguments so they pad nicer
addAction([args_size, xmm_args_size, has_side_effects, this, result, func_addr, _args, _args_xmm]() {
this->_call(result, has_side_effects, func_addr, llvm::ArrayRef<RewriterVar*>(_args, args_size),
llvm::ArrayRef<RewriterVar*>(_args_xmm, xmm_args_size));
for (int i = 0; i < args_size; i++)
_args[i]->bumpUse();
for (int i = 0; i < xmm_args_size; i++)
_args_xmm[i]->bumpUse();
}, uses, type);
struct LambdaClosure {
RewriterVar** args_array;
struct {
unsigned int has_side_effects : 1;
unsigned int can_throw : 1;
unsigned int num_args : 16;
unsigned int num_args_xmm : 16;
unsigned int num_additional_uses : 16;
};
llvm::ArrayRef<RewriterVar*> allArgs() const {
return llvm::makeArrayRef(args_array, num_args + num_args_xmm + num_additional_uses);
}
llvm::ArrayRef<RewriterVar*> args() const { return allArgs().slice(0, num_args); }
llvm::ArrayRef<RewriterVar*> argsXmm() const { return allArgs().slice(num_args, num_args_xmm); }
llvm::ArrayRef<RewriterVar*> additionalUses() const {
return allArgs().slice((int)num_args + (int)num_args_xmm, num_additional_uses);
}
LambdaClosure(llvm::MutableArrayRef<RewriterVar*> args_array_ref, llvm::ArrayRef<RewriterVar*> _args,
llvm::ArrayRef<RewriterVar*> _args_xmm, llvm::ArrayRef<RewriterVar*> _addition_uses,
bool has_side_effects, bool can_throw)
: args_array(args_array_ref.data()),
has_side_effects(has_side_effects),
can_throw(can_throw),
num_args(_args.size()),
num_args_xmm(_args_xmm.size()),
num_additional_uses(_addition_uses.size()) {
assert(_args.size() < 1 << 16);
assert(_args_xmm.size() < 1 << 16);
assert(_addition_uses.size() < 1 << 16);
}
} lambda_closure(args_array_ref, args, args_xmm, additional_uses, has_side_effects, can_throw);
assert(lambda_closure.args().size() == args.size());
assert(lambda_closure.argsXmm().size() == args_xmm.size());
assert(lambda_closure.additionalUses().size() == additional_uses.size());
addAction([this, result, func_addr, lambda_closure]() {
this->_call(result, lambda_closure.has_side_effects, lambda_closure.can_throw, func_addr, lambda_closure.args(),
lambda_closure.argsXmm(), lambda_closure.allArgs());
}, lambda_closure.allArgs(), type);
return result;
}
void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> args,
llvm::ArrayRef<RewriterVar*> args_xmm, Location preserve) {
llvm::ArrayRef<RewriterVar*> args_xmm, Location preserve,
llvm::ArrayRef<RewriterVar*> bump_if_possible) {
if (has_side_effects)
assert(done_guarding);
......@@ -1112,6 +1072,10 @@ void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> ar
}
#endif
for (auto&& use : bump_if_possible) {
use->bumpUseEarlyIfPossible();
}
// Spill caller-saved registers:
for (auto check_reg : caller_save_registers) {
// check_reg.dump();
......@@ -1173,8 +1137,9 @@ void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> ar
#endif
}
void Rewriter::_call(RewriterVar* result, bool has_side_effects, void* func_addr, llvm::ArrayRef<RewriterVar*> args,
llvm::ArrayRef<RewriterVar*> args_xmm) {
void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw, void* func_addr,
llvm::ArrayRef<RewriterVar*> args, llvm::ArrayRef<RewriterVar*> args_xmm,
llvm::ArrayRef<RewriterVar*> vars_to_bump) {
if (LOG_IC_ASSEMBLY)
assembler->comment("_call");
......@@ -1183,17 +1148,7 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, void* func_addr
if (failed)
return;
_setupCall(has_side_effects, args, args_xmm, assembler::R11);
// This duty is now on the caller:
/*
for (RewriterVar* arg : args) {
arg->bumpUse();
}
for (RewriterVar* arg_xmm : args_xmm) {
arg_xmm->bumpUse();
}
*/
_setupCall(has_side_effects, args, args_xmm, assembler::R11, vars_to_bump);
assertConsistent();
......@@ -1211,8 +1166,8 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, void* func_addr
assert(assembler->hasFailed() || asm_address == (uint64_t)assembler->curInstPointer());
}
// TODO: we don't need to generate the decref info for calls which can't throw
registerDecrefInfoHere();
if (can_throw)
registerDecrefInfoHere();
if (!failed) {
assert(vars_by_location.count(assembler::RAX) == 0);
......@@ -1224,23 +1179,38 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, void* func_addr
if (result)
result->releaseIfNoUses();
for (RewriterVar* var : vars_to_bump) {
var->bumpUseLateIfNecessary();
}
}
std::vector<Location> Rewriter::getDecrefLocations() {
std::vector<Location> decref_infos;
for (RewriterVar& var : vars) {
if (var.locations.size() && var.needsDecref()) {
// TODO: add code to handle other location types and choose best location if there are several
Location l = *var.locations.begin();
if (l.type == Location::Scratch) {
// convert to stack based location because later on we may not know the offset of the scratch area from
// the SP.
decref_infos.emplace_back(Location::Stack, indirectFor(l).offset);
} else if (l.type == Location::Register) {
// CSRs shouldn't be getting allocated, and we should only be calling this at a callsite:
RELEASE_ASSERT(0, "we shouldn't be trying to decref anything in a register");
} else
RELEASE_ASSERT(0, "not implemented");
if (var.locations.size() && var.needsDecref(current_action_idx)) {
bool found_location = false;
for (Location l : var.locations) {
if (l.type == Location::Scratch) {
// convert to stack based location because later on we may not know the offset of the scratch area
// from the SP.
decref_infos.emplace_back(Location::Stack, indirectFor(l).offset);
found_location = true;
break;
} else if (l.type == Location::Register) {
// we only allow registers which are not clobbered by a call
if (l.isClobberedByCall())
continue;
decref_infos.emplace_back(l);
found_location = true;
break;
} else
RELEASE_ASSERT(0, "not implemented");
}
if (!found_location) {
// this is very rare. just fail the rewrite for now
failed = true;
}
}
}
......@@ -1335,16 +1305,25 @@ void RewriterVar::refConsumed(RewriterAction* action) {
last_refconsumed_numuses = uses.size();
if (!action)
action = rewriter->getLastAction();
action->consumed_refs.emplace_back(this);
action->consumed_refs.push_front(this);
}
void RewriterVar::refUsed() {
// TODO: This is a pretty silly implementation that might prevent other optimizations?
rewriter->addAction([=]() { this->bumpUse(); }, { this }, ActionType::NORMAL);
}
bool RewriterVar::needsDecref(int current_action_index) {
rewriter->assertPhaseEmitting();
if (reftype != RefType::OWNED)
return false;
// if nothing consumes this reference we need to create a decref entry
if (num_refs_consumed == 0)
return true;
bool RewriterVar::needsDecref() {
return reftype == RefType::OWNED && !this->refHandedOff();
// don't create decref entry if the currenty action hands off the ownership
int reference_handed_off_action_index = uses[last_refconsumed_numuses - 1];
if (reference_handed_off_action_index == current_action_index)
return false;
return true;
}
void RewriterVar::registerOwnedAttr(int byte_offset) {
......@@ -1497,6 +1476,7 @@ void Rewriter::commit() {
_incref(var, 1);
}
current_action_idx = i;
actions[i].action();
if (failed) {
......@@ -1926,7 +1906,7 @@ void Rewriter::_checkAndThrowCAPIException(RewriterVar* r, int64_t exc_val, asse
} else
assembler->cmp(var_reg, assembler::Immediate(exc_val), type);
_setupCall(false, RewriterVar::SmallVector(), RewriterVar::SmallVector());
_setupCall(false, {});
{
assembler::ForwardJump jnz(*assembler, assembler::COND_NOT_ZERO);
assembler->mov(assembler::Immediate((void*)throwCAPIException), assembler::R11);
......@@ -2004,6 +1984,11 @@ void Rewriter::spillRegister(assembler::XMMRegister reg) {
}
assembler::Register Rewriter::allocReg(Location dest, Location otherThan) {
return allocReg(dest, otherThan, allocatable_regs);
}
assembler::Register Rewriter::allocReg(Location dest, Location otherThan,
llvm::ArrayRef<assembler::Register> valid_registers) {
assertPhaseEmitting();
if (dest.type == Location::AnyReg) {
......@@ -2012,7 +1997,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) {
assembler::Register best_reg(0);
// TODO prioritize spilling a constant register?
for (assembler::Register reg : allocatable_regs) {
for (assembler::Register reg : valid_registers) {
if (Location(reg) != otherThan) {
if (vars_by_location.count(reg) == 0) {
return reg;
......@@ -2042,6 +2027,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) {
assert(failed || vars_by_location.count(best_reg) == 0);
return best_reg;
} else if (dest.type == Location::Register) {
assert(std::find(valid_registers.begin(), valid_registers.end(), dest.asRegister()) != valid_registers.end());
assembler::Register reg(dest.regnum);
if (vars_by_location.count(reg)) {
......@@ -2193,12 +2179,14 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const L
return_location(this->rewrite->returnRegister()),
failed(false),
needs_invalidation_support(needs_invalidation_support),
current_action_idx(-1),
added_changing_action(false),
marked_inside_ic(false),
done_guarding(false),
last_guard_action(-1),
offset_eq_jmp_slowpath(-1),
offset_ne_jmp_slowpath(-1) {
offset_ne_jmp_slowpath(-1),
allocatable_regs(std_allocatable_regs) {
initPhaseCollecting();
finished = false;
......
......@@ -16,6 +16,7 @@
#define PYSTON_ASMWRITING_REWRITER_H
#include <deque>
#include <forward_list>
#include <list>
#include <map>
#include <memory>
......@@ -192,8 +193,6 @@ public:
// if no action is specified it will assume the last action consumed the reference
void refConsumed(RewriterAction* action = NULL);
void refUsed();
// registerOwnedAttr tells the refcounter that a certain memory location holds a pointer
// to an owned reference. This must be paired with a call to deregisterOwnedAttr
// Call these right before emitting the store (for register) or decref (for deregister).
......@@ -237,11 +236,11 @@ private:
// /* some code */
// bumpUseLateIfNecessary();
void bumpUseEarlyIfPossible() {
if (reftype != RefType::OWNED)
if (reftype != RefType::OWNED && !hasScratchAllocation())
bumpUse();
}
void bumpUseLateIfNecessary() {
if (reftype == RefType::OWNED)
if (reftype == RefType::OWNED || hasScratchAllocation())
bumpUse();
}
......@@ -254,7 +253,7 @@ private:
bool isDoneUsing() { return next_use == uses.size(); }
bool hasScratchAllocation() const { return scratch_allocation.second > 0; }
void resetHasScratchAllocation() { scratch_allocation = std::make_pair(0, 0); }
bool needsDecref();
bool needsDecref(int current_action_index);
// Indicates if this variable is an arg, and if so, what location the arg is from.
bool is_arg;
......@@ -339,8 +338,9 @@ public:
class RewriterAction {
public:
SmallFunction<56> action;
std::vector<RewriterVar*> consumed_refs;
SmallFunction<48> action;
std::forward_list<RewriterVar*> consumed_refs;
template <typename F> RewriterAction(F&& action) : action(std::forward<F>(action)) {}
......@@ -367,7 +367,33 @@ private:
protected:
// Allocates `bytes` bytes of data. The allocation will get freed when the rewriter gets freed.
void* regionAlloc(size_t bytes) { return allocator.Allocate(bytes, 16 /* alignment */); }
void* regionAlloc(size_t bytes, int alignment = 16) { return allocator.Allocate(bytes, alignment); }
template <typename T> llvm::MutableArrayRef<T> regionAlloc(size_t num_elements) {
return llvm::MutableArrayRef<T>(allocator.Allocate<T>(num_elements), num_elements);
}
// This takes a variable number of llvm::ArrayRef<RewriterVar*> and copies in all elements into a single contiguous
// memory location.
template <typename... Args>
llvm::MutableArrayRef<RewriterVar*> regionAllocArgs(llvm::ArrayRef<RewriterVar*> arg1, Args... args) {
size_t num_total_args = 0;
for (auto&& array : { arg1, args... }) {
num_total_args += array.size();
}
if (num_total_args == 0)
return llvm::MutableArrayRef<RewriterVar*>();
auto args_array_ref = regionAlloc<RewriterVar*>(num_total_args);
auto insert_point = args_array_ref;
for (auto&& array : { arg1, args... }) {
if (!array.empty()) {
memcpy(insert_point.data(), array.data(), array.size() * sizeof(RewriterVar*));
insert_point = insert_point.slice(array.size());
}
}
assert(insert_point.size() == 0);
return args_array_ref;
}
// Helps generating the best code for loading a const integer value.
// By keeping track of the last known value of every register and reusing it.
......@@ -432,6 +458,8 @@ protected:
bool needs_invalidation_support = true);
std::deque<RewriterAction> actions;
int current_action_idx; // in the emitting phase get's set to index of currently executed action
template <typename F> RewriterAction* addAction(F&& action, llvm::ArrayRef<RewriterVar*> vars, ActionType type) {
assertPhaseCollecting();
for (RewriterVar* var : vars) {
......@@ -483,6 +511,8 @@ protected:
// Allocates a register. dest must be of type Register or AnyReg
// If otherThan is a register, guaranteed to not use that register.
assembler::Register allocReg(Location dest, Location otherThan = Location::any());
assembler::Register allocReg(Location dest, Location otherThan,
llvm::ArrayRef<assembler::Register> valid_registers);
assembler::XMMRegister allocXMMReg(Location dest, Location otherThan = Location::any());
// Allocates an 8-byte region in the scratch space
Location allocScratch();
......@@ -507,11 +537,13 @@ protected:
void _slowpathJump(bool condition_eq);
void _trap();
void _loadConst(RewriterVar* result, int64_t val);
void _setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> args, llvm::ArrayRef<RewriterVar*> args_xmm,
Location preserve = Location::any());
void _setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> args = {},
llvm::ArrayRef<RewriterVar*> args_xmm = {}, Location preserve = Location::any(),
llvm::ArrayRef<RewriterVar*> bump_if_possible = {});
// _call does not call bumpUse on its arguments:
void _call(RewriterVar* result, bool has_side_effects, void* func_addr, llvm::ArrayRef<RewriterVar*> args,
llvm::ArrayRef<RewriterVar*> args_xmm);
void _call(RewriterVar* result, bool has_side_effects, bool can_throw, void* func_addr,
llvm::ArrayRef<RewriterVar*> args, llvm::ArrayRef<RewriterVar*> args_xmm = {},
llvm::ArrayRef<RewriterVar*> vars_to_bump = {});
void _add(RewriterVar* result, RewriterVar* a, int64_t b, Location dest);
int _allocate(RewriterVar* result, int n);
void _allocateAndCopy(RewriterVar* result, RewriterVar* array, int n);
......@@ -565,6 +597,8 @@ protected:
#endif
}
llvm::ArrayRef<assembler::Register> allocatable_regs;
public:
// This should be called exactly once for each argument
RewriterVar* getArg(int argnum);
......@@ -606,16 +640,13 @@ public:
// 2) does not have any side-effects that would be user-visible if we bailed out from the middle of the
// inline cache. (Extra allocations don't count even though they're potentially visible if you look
// hard enough.)
RewriterVar* call(bool has_side_effects, void* func_addr, const RewriterVar::SmallVector& args,
const RewriterVar::SmallVector& args_xmm = RewriterVar::SmallVector());
RewriterVar* call(bool has_side_effects, void* func_addr);
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0);
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1);
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2);
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2,
RewriterVar* arg3);
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2,
RewriterVar* arg3, RewriterVar* arg4);
RewriterVar* call(bool has_side_effects, void* func_addr, llvm::ArrayRef<RewriterVar*> args = {},
llvm::ArrayRef<RewriterVar*> args_xmm = {}, llvm::ArrayRef<RewriterVar*> additional_uses = {});
template <typename... Args>
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg1, Args... args) {
return call(has_side_effects, func_addr, llvm::ArrayRef<RewriterVar*>({ arg1, args... }), {});
}
RewriterVar* add(RewriterVar* a, int64_t b, Location dest);
// Allocates n pointer-sized stack slots:
RewriterVar* allocate(int n);
......
......@@ -458,7 +458,7 @@ void ASTInterpreter::doStore(AST_Name* node, STOLEN(Value) value) {
ScopeInfo::VarScopeType vst = node->lookup_type;
if (vst == ScopeInfo::VarScopeType::GLOBAL) {
if (jit)
jit->emitSetGlobal(frame_info.globals, name.getBox(), value);
jit->emitSetGlobal(name.getBox(), value, getMD()->source->scoping->areGlobalsFromModule());
setGlobal(frame_info.globals, name.getBox(), value.o);
} else if (vst == ScopeInfo::VarScopeType::NAME) {
if (jit)
......@@ -471,13 +471,12 @@ void ASTInterpreter::doStore(AST_Name* node, STOLEN(Value) value) {
bool closure = vst == ScopeInfo::VarScopeType::CLOSURE;
if (jit) {
bool is_live = true;
// TODO: turn this optimization back on.
// if (!closure)
// is_live = source_info->getLiveness()->isLiveAtEnd(name, current_block);
if (!closure)
is_live = source_info->getLiveness()->isLiveAtEnd(name, current_block);
if (is_live)
jit->emitSetLocal(name, node->vreg, closure, value);
else
jit->emitSetBlockLocal(name, value);
jit->emitSetBlockLocal(name, node->vreg, value);
}
if (closure) {
......@@ -686,12 +685,12 @@ Value ASTInterpreter::visit_jump(AST_Jump* node) {
if (backedge)
++edgecount;
if (ENABLE_BASELINEJIT && backedge && edgecount == OSR_THRESHOLD_INTERPRETER && !jit && !node->target->code) {
if (ENABLE_BASELINEJIT && backedge && edgecount >= OSR_THRESHOLD_INTERPRETER && !jit && !node->target->code) {
should_jit = true;
startJITing(node->target);
}
if (backedge && edgecount == OSR_THRESHOLD_BASELINE) {
if (backedge && edgecount >= OSR_THRESHOLD_BASELINE) {
Box* rtn = doOSR(node);
if (rtn)
return Value(rtn, NULL);
......@@ -1173,12 +1172,9 @@ Value ASTInterpreter::createFunction(AST* node, AST_arguments* args, const std::
closure_var = jit->imm(0ul);
if (!passed_globals_var)
passed_globals_var = jit->imm(0ul);
rtn.var = jit->call(false, (void*)createFunctionFromMetadata, jit->imm(md), closure_var, passed_globals_var,
defaults_var, jit->imm(args->defaults.size()))->setType(RefType::OWNED);
for (auto d_var : defaults_vars) {
d_var->refUsed();
}
rtn.var = jit->call(false, (void*)createFunctionFromMetadata, { jit->imm(md), closure_var, passed_globals_var,
defaults_var, jit->imm(args->defaults.size()) },
{}, defaults_vars)->setType(RefType::OWNED);
}
rtn.o = createFunctionFromMetadata(md, closure, passed_globals, u.il);
......@@ -1661,7 +1657,7 @@ Value ASTInterpreter::visit_name(AST_Name* node) {
assert(!node->is_kill);
Value v;
if (jit)
v.var = jit->emitGetGlobal(frame_info.globals, node->id.getBox());
v.var = jit->emitGetGlobal(node->id.getBox());
v.o = getGlobal(frame_info.globals, node->id.getBox());
return v;
......@@ -1781,6 +1777,10 @@ int ASTInterpreterJitInterface::getBoxedLocalsOffset() {
return offsetof(ASTInterpreter, frame_info.boxedLocals);
}
int ASTInterpreterJitInterface::getCreatedClosureOffset() {
return offsetof(ASTInterpreter, created_closure);
}
int ASTInterpreterJitInterface::getCurrentBlockOffset() {
return offsetof(ASTInterpreter, current_block);
}
......
......@@ -39,6 +39,7 @@ struct ASTInterpreterJitInterface {
static constexpr uint64_t osr_dummy_value = -1;
static int getBoxedLocalsOffset();
static int getCreatedClosureOffset();
static int getCurrentBlockOffset();
static int getCurrentInstOffset();
static int getEdgeCountOffset();
......
......@@ -16,6 +16,7 @@
#include <llvm/ADT/DenseMap.h>
#include <llvm/ADT/DenseSet.h>
#include <sys/mman.h>
#include "codegen/irgen/hooks.h"
#include "codegen/memmgr.h"
......@@ -38,30 +39,43 @@ static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations;
//
// long foo(char* c);
// void bjit() {
// asm volatile ("" ::: "r15");
// asm volatile ("" ::: "r14");
// asm volatile ("" ::: "r13");
// asm volatile ("" ::: "r12");
// char scratch[256+16];
// foo(scratch);
// }
//
// It omits the frame pointer but saves r13 and r14
// It omits the frame pointer but saves r12, r13, r14 and r15
// use 'objdump -s -j .eh_frame <obj.file>' to dump it
const unsigned char eh_info[]
= { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10,
0x01, 0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42,
0x0e, 0x18, 0x47, 0x0e, 0xb0, 0x02, 0x8d, 0x03, 0x8e, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 };
= { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01,
0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42, 0x0e, 0x18, 0x42,
0x0e, 0x20, 0x42, 0x0e, 0x28, 0x47, 0x0e, 0xc0, 0x02, 0x8c, 0x05, 0x8d, 0x04, 0x8e, 0x03, 0x8f,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
static_assert(JitCodeBlock::num_stack_args == 2, "have to update EH table!");
static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!");
constexpr int code_size = JitCodeBlock::memory_size - sizeof(eh_info);
JitCodeBlock::MemoryManager::MemoryManager() {
int protection = PROT_READ | PROT_WRITE | PROT_EXEC;
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
#if ENABLE_BASELINEJIT_MAP_32BIT
flags |= MAP_32BIT;
#endif
addr = (uint8_t*)mmap(NULL, JitCodeBlock::memory_size, protection, flags, -1, 0);
}
JitCodeBlock::MemoryManager::~MemoryManager() {
munmap(addr, JitCodeBlock::memory_size);
addr = NULL;
}
JitCodeBlock::JitCodeBlock(llvm::StringRef name)
: memory(new uint8_t[memory_size]),
entry_offset(0),
a(memory.get() + sizeof(eh_info), code_size),
is_currently_writing(false),
asm_failed(false) {
: entry_offset(0), a(memory.get() + sizeof(eh_info), code_size), is_currently_writing(false), asm_failed(false) {
static StatCounter num_jit_code_blocks("num_baselinejit_code_blocks");
num_jit_code_blocks.log();
static StatCounter num_jit_total_bytes("num_baselinejit_total_bytes");
......@@ -70,8 +84,10 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
uint8_t* code = a.curInstPointer();
// emit prolog
a.push(assembler::R15);
a.push(assembler::R14);
a.push(assembler::R13);
a.push(assembler::R12);
static_assert(sp_adjustment % 16 == 8, "stack isn't aligned");
a.sub(assembler::Immediate(sp_adjustment), assembler::RSP);
a.mov(assembler::RDI, assembler::R13); // interpreter pointer
......@@ -136,6 +152,12 @@ void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping
ic_info.appendDecrefInfosTo(decref_infos);
}
static const assembler::Register bjit_allocatable_regs[]
= { assembler::RAX, assembler::RCX, assembler::RDX,
// no RSP
// no RBP
assembler::RDI, assembler::RSI, assembler::R8, assembler::R9,
assembler::R10, assembler::R11, assembler::R12, assembler::R15 };
JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info,
std::unique_ptr<ICSlotRewrite> rewrite, int code_offset, int num_bytes_overlapping,
......@@ -149,6 +171,8 @@ JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic
code_block(code_block),
interp(0),
ic_info(std::move(ic_info)) {
allocatable_regs = bjit_allocatable_regs;
if (LOG_BJIT_ASSEMBLY)
comment("BJIT: JitFragmentWriter() start");
interp = createNewVar();
......@@ -198,10 +222,10 @@ RewriterVar* JitFragmentWriter::emitCallattr(AST_expr* node, RewriterVar* obj, B
call_args.push_back(args.size() > 1 ? args[1] : imm(0ul));
call_args.push_back(args.size() > 2 ? args[2] : imm(0ul));
llvm::ArrayRef<RewriterVar*> additional_uses;
if (args.size() > 3) {
RewriterVar* scratch = allocate(args.size() - 3);
for (int i = 0; i < args.size() - 3; ++i)
scratch->setAttr(i * sizeof(void*), args[i + 3], RewriterVar::SetattrType::REF_USED);
additional_uses = args.slice(3);
RewriterVar* scratch = allocArgs(additional_uses, RewriterVar::SetattrType::REF_USED);
call_args.push_back(scratch);
} else if (keyword_names) {
call_args.push_back(imm(0ul));
......@@ -210,11 +234,8 @@ RewriterVar* JitFragmentWriter::emitCallattr(AST_expr* node, RewriterVar* obj, B
if (keyword_names)
call_args.push_back(imm(keyword_names));
auto r = emitPPCall((void*)callattr, call_args, 2, 640, node, type_recorder).first->setType(RefType::OWNED);
for (int i = 3; i < args.size(); i++) {
args[i]->refUsed();
}
return r;
return emitPPCall((void*)callattr, call_args, 2, 640, node, type_recorder, additional_uses)
.first->setType(RefType::OWNED);
#else
// We could make this faster but for now: keep it simple, stupid...
RewriterVar* attr_var = imm(attr);
......@@ -223,7 +244,7 @@ RewriterVar* JitFragmentWriter::emitCallattr(AST_expr* node, RewriterVar* obj, B
RewriterVar* args_array = nullptr;
if (args.size())
args_array = allocArgs(args);
args_array = allocArgs(args, RewriterVar::SetattrType::REF_USED);
else
RELEASE_ASSERT(!keyword_names_var, "0 args but keyword names are set");
......@@ -244,7 +265,10 @@ RewriterVar* JitFragmentWriter::emitCallattr(AST_expr* node, RewriterVar* obj, B
}
RewriterVar* JitFragmentWriter::emitCompare(AST_expr* node, RewriterVar* lhs, RewriterVar* rhs, int op_type) {
// TODO: can directly emit the assembly for Is/IsNot
if (op_type == AST_TYPE::Is || op_type == AST_TYPE::IsNot) {
RewriterVar* cmp_result = lhs->cmp(op_type == AST_TYPE::IsNot ? AST_TYPE::NotEq : AST_TYPE::Eq, rhs);
return call(false, (void*)boxBool, cmp_result)->setType(RefType::OWNED);
}
return emitPPCall((void*)compare, { lhs, rhs, imm(op_type) }, 2, 240, node).first->setType(RefType::OWNED);
}
......@@ -258,42 +282,9 @@ void JitFragmentWriter::emitDictSet(RewriterVar* dict, RewriterVar* k, RewriterV
v->refConsumed();
}
// TODO: merge this function's functionality with refUsed
RewriterVar* JitFragmentWriter::emitCallWithAllocatedArgs(void* func_addr, const llvm::ArrayRef<RewriterVar*> args,
const llvm::ArrayRef<RewriterVar*> additional_uses) {
RewriterVar* result = createNewVar();
RewriterVar::SmallVector uses;
for (RewriterVar* v : args) {
assert(v != NULL);
uses.push_back(v);
}
for (RewriterVar* v : additional_uses) {
assert(v != NULL);
uses.push_back(v);
}
// It's not nice to pass llvm::SmallVectors through a closure, especially with our SmallFunction
// optimization, so just regionAlloc them and copy the data in:
RewriterVar** _args = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * args.size());
memcpy(_args, args.begin(), sizeof(RewriterVar*) * args.size());
RewriterVar** _args_additional = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * additional_uses.size());
memcpy(_args_additional, additional_uses.begin(), sizeof(RewriterVar*) * additional_uses.size());
int args_size = args.size();
assert(additional_uses.size() <= 0x7fff);
// Hack: pack this into a short to make sure it fits in the closure
short args_additional_size = additional_uses.size();
// Hack: explicitly order the closure arguments so they pad nicer
addAction([args_size, args_additional_size, func_addr, this, result, _args, _args_additional]() {
this->_call(result, false, func_addr, llvm::ArrayRef<RewriterVar*>(_args, args_size),
llvm::ArrayRef<RewriterVar*>());
for (int i = 0; i < args_size; i++)
_args[i]->bumpUse();
for (int i = 0; i < args_additional_size; i++)
_args_additional[i]->bumpUse();
}, uses, ActionType::NORMAL);
return result;
return call(false, func_addr, args, {}, additional_uses);
}
RewriterVar* JitFragmentWriter::emitCreateList(const llvm::ArrayRef<RewriterVar*> values) {
......@@ -361,7 +352,7 @@ RewriterVar* JitFragmentWriter::emitExceptionMatches(RewriterVar* v, RewriterVar
}
RewriterVar* JitFragmentWriter::emitGetAttr(RewriterVar* obj, BoxedString* s, AST_expr* node) {
return emitPPCall((void*)getattr, { obj, imm(s) }, 2, 512, node, getTypeRecorderForNode(node))
return emitPPCall((void*)getattr, { obj, imm(s) }, 2, 256, node, getTypeRecorderForNode(node))
.first->setType(RefType::OWNED);
}
......@@ -369,17 +360,16 @@ RewriterVar* JitFragmentWriter::emitGetBlockLocal(InternedString s, int vreg) {
auto it = local_syms.find(s);
if (it == local_syms.end()) {
auto r = emitGetLocal(s, vreg);
// TODO: clear out the vreg?
// assert(r->reftype == RefType::OWNED);
// emitSetLocal(s, vreg, false, imm(nullptr));
// emitSetBlockLocal(s, r);
assert(r->reftype == RefType::OWNED);
emitSetBlockLocal(s, vreg, r);
return r;
}
return it->second;
}
void JitFragmentWriter::emitKillTemporary(InternedString s, int vreg) {
emitSetLocal(s, vreg, false, imm(nullptr));
if (!local_syms.count(s))
emitSetLocal(s, vreg, false, imm(nullptr));
}
RewriterVar* JitFragmentWriter::emitGetBoxedLocal(BoxedString* s) {
......@@ -393,23 +383,21 @@ RewriterVar* JitFragmentWriter::emitGetBoxedLocals() {
}
RewriterVar* JitFragmentWriter::emitGetClsAttr(RewriterVar* obj, BoxedString* s) {
return emitPPCall((void*)getclsattr, { obj, imm(s) }, 2, 512).first->setType(RefType::OWNED);
return emitPPCall((void*)getclsattr, { obj, imm(s) }, 2, 256).first->setType(RefType::OWNED);
}
RewriterVar* JitFragmentWriter::emitGetGlobal(Box* global, BoxedString* s) {
RewriterVar* JitFragmentWriter::emitGetGlobal(BoxedString* s) {
if (s->s() == "None") {
RewriterVar* r = imm(None)->setType(RefType::BORROWED);
return r;
}
RewriterVar* args[] = { NULL, NULL };
args[0] = imm(global);
args[1] = imm(s);
return emitPPCall((void*)getGlobal, args, 2, 512).first->setType(RefType::OWNED);
RewriterVar* globals = getInterp()->getAttr(ASTInterpreterJitInterface::getGlobalsOffset());
return emitPPCall((void*)getGlobal, { globals, imm(s) }, 1, 128).first->setType(RefType::OWNED);
}
RewriterVar* JitFragmentWriter::emitGetItem(AST_expr* node, RewriterVar* value, RewriterVar* slice) {
return emitPPCall((void*)getitem, { value, slice }, 2, 512, node).first->setType(RefType::OWNED);
return emitPPCall((void*)getitem, { value, slice }, 1, 256, node).first->setType(RefType::OWNED);
}
RewriterVar* JitFragmentWriter::emitGetLocal(InternedString s, int vreg) {
......@@ -453,11 +441,11 @@ RewriterVar* JitFragmentWriter::emitLandingpad() {
RewriterVar* JitFragmentWriter::emitNonzero(RewriterVar* v) {
// nonzeroHelper returns bool
return call(false, (void*)nonzeroHelper, v)->setType(RefType::OWNED);
return call(false, (void*)nonzeroHelper, v)->setType(RefType::BORROWED);
}
RewriterVar* JitFragmentWriter::emitNotNonzero(RewriterVar* v) {
return call(false, (void*)notHelper, v)->setType(RefType::OWNED);
return call(false, (void*)notHelper, v)->setType(RefType::BORROWED);
}
RewriterVar* JitFragmentWriter::emitRepr(RewriterVar* v) {
......@@ -478,21 +466,18 @@ RewriterVar* JitFragmentWriter::emitRuntimeCall(AST_expr* node, RewriterVar* obj
call_args.push_back(args.size() > 1 ? args[1] : imm(0ul));
call_args.push_back(args.size() > 2 ? args[2] : imm(0ul));
llvm::ArrayRef<RewriterVar*> additional_uses;
if (args.size() > 3) {
RewriterVar* scratch = allocate(args.size() - 3);
for (int i = 0; i < args.size() - 3; ++i)
scratch->setAttr(i * sizeof(void*), args[i + 3], RewriterVar::SetattrType::REF_USED);
additional_uses = args.slice(3);
RewriterVar* scratch = allocArgs(additional_uses, RewriterVar::SetattrType::REF_USED);
call_args.push_back(scratch);
} else
call_args.push_back(imm(0ul));
if (keyword_names)
call_args.push_back(imm(keyword_names));
auto r = emitPPCall((void*)runtimeCall, call_args, 2, 640, node, type_recorder).first->setType(RefType::OWNED);
for (int i = 3; i < args.size(); i++) {
args[i]->refUsed();
}
return r;
return emitPPCall((void*)runtimeCall, call_args, 2, 640, node, type_recorder, additional_uses)
.first->setType(RefType::OWNED);
#else
RewriterVar* argspec_var = imm(argspec.asInt());
RewriterVar* keyword_names_var = keyword_names ? imm(keyword_names) : nullptr;
......@@ -535,22 +520,40 @@ std::vector<RewriterVar*> JitFragmentWriter::emitUnpackIntoArray(RewriterVar* v,
}
RewriterVar* JitFragmentWriter::emitYield(RewriterVar* v) {
auto rtn = call(false, (void*)ASTInterpreterJitInterface::yieldHelper, getInterp(), v)->setType(RefType::OWNED);
llvm::SmallVector<RewriterVar*, 16> local_args;
local_args.push_back(interp->getAttr(ASTInterpreterJitInterface::getCreatedClosureOffset()));
// we have to pass all owned references which are not stored in the vregs to yield() so that the GC can traverse it
for (auto&& sym : local_syms) {
if (sym.second == v)
continue;
if (sym.second->reftype == RefType::OWNED)
local_args.push_back(sym.second);
}
// erase duplicate entries
std::sort(local_args.begin(), local_args.end());
local_args.erase(std::unique(local_args.begin(), local_args.end()), local_args.end());
auto&& args = allocArgs(local_args, RewriterVar::SetattrType::REF_USED);
RewriterVar* generator = interp->getAttr(ASTInterpreterJitInterface::getGeneratorOffset());
auto rtn = call(false, (void*)yield, { generator, v, args, imm(local_args.size()) }, {}, local_args)
->setType(RefType::OWNED);
v->refConsumed();
return rtn;
}
void JitFragmentWriter::emitDelAttr(RewriterVar* target, BoxedString* attr) {
emitPPCall((void*)delattr, { target, imm(attr) }, 1, 512).first;
emitPPCall((void*)delattr, { target, imm(attr) }, 1, 144).first;
}
void JitFragmentWriter::emitDelGlobal(BoxedString* name) {
RewriterVar* globals = getInterp()->getAttr(ASTInterpreterJitInterface::getGlobalsOffset());
emitPPCall((void*)delGlobal, { globals, imm(name) }, 1, 512).first;
// does not get rewriten yet
// emitPPCall((void*)delGlobal, { globals, imm(name) }, 1, 512).first;
call(false, (void*)delGlobal, globals, imm(name));
}
void JitFragmentWriter::emitDelItem(RewriterVar* target, RewriterVar* slice) {
emitPPCall((void*)delitem, { target, slice }, 1, 512).first;
emitPPCall((void*)delitem, { target, slice }, 1, 256).first;
}
void JitFragmentWriter::emitDelName(InternedString name) {
......@@ -623,14 +626,16 @@ void JitFragmentWriter::emitReturn(RewriterVar* v) {
}
void JitFragmentWriter::emitSetAttr(AST_expr* node, RewriterVar* obj, BoxedString* s, STOLEN(RewriterVar*) attr) {
auto rtn = emitPPCall((void*)setattr, { obj, imm(s), attr }, 2, 512, node);
auto rtn = emitPPCall((void*)setattr, { obj, imm(s), attr }, 2, 256, node);
attr->refConsumed(rtn.second);
}
void JitFragmentWriter::emitSetBlockLocal(InternedString s, STOLEN(RewriterVar*) v) {
void JitFragmentWriter::emitSetBlockLocal(InternedString s, int vreg, STOLEN(RewriterVar*) v) {
if (LOG_BJIT_ASSEMBLY)
comment("BJIT: emitSetBlockLocal() start");
RewriterVar* prev = local_syms[s];
if (!prev)
emitSetLocal(s, vreg, false, imm(nullptr)); // clear out the vreg
local_syms[s] = v;
if (LOG_BJIT_ASSEMBLY)
comment("BJIT: emitSetBlockLocal() end");
......@@ -647,8 +652,13 @@ void JitFragmentWriter::emitSetExcInfo(RewriterVar* type, RewriterVar* value, Re
traceback->refConsumed();
}
void JitFragmentWriter::emitSetGlobal(Box* global, BoxedString* s, STOLEN(RewriterVar*) v) {
auto rtn = emitPPCall((void*)setGlobal, { imm(global), imm(s), v }, 2, 512);
void JitFragmentWriter::emitSetGlobal(BoxedString* s, STOLEN(RewriterVar*) v, bool are_globals_from_module) {
RewriterVar* globals = getInterp()->getAttr(ASTInterpreterJitInterface::getGlobalsOffset());
std::pair<RewriterVar*, RewriterAction*> rtn;
if (are_globals_from_module)
rtn = emitPPCall((void*)setattr, { globals, imm(s), v }, 1, 256);
else
rtn = emitPPCall((void*)setGlobal, { globals, imm(s), v }, 1, 256);
v->refConsumed(rtn.second);
}
......@@ -853,31 +863,38 @@ uint64_t JitFragmentWriter::asUInt(InternedString s) {
}
#endif
std::pair<RewriterVar*, RewriterAction*> JitFragmentWriter::emitPPCall(void* func_addr,
llvm::ArrayRef<RewriterVar*> args, int num_slots,
int slot_size, AST* ast_node,
TypeRecorder* type_recorder) {
std::pair<RewriterVar*, RewriterAction*>
JitFragmentWriter::emitPPCall(void* func_addr, llvm::ArrayRef<RewriterVar*> args, unsigned char num_slots,
unsigned short slot_size, AST* ast_node, TypeRecorder* type_recorder,
llvm::ArrayRef<RewriterVar*> additional_uses) {
if (LOG_BJIT_ASSEMBLY)
comment("BJIT: emitPPCall() start");
RewriterVar::SmallVector args_vec(args.begin(), args.end());
#if ENABLE_BASELINEJIT_ICS
RewriterVar* result = createNewVar();
auto args_array_ref = regionAllocArgs(args, additional_uses);
assert(args.size() < 1ul << 32);
int args_size = args.size();
RewriterVar** _args = (RewriterVar**)regionAlloc(sizeof(RewriterVar*) * args_size);
memcpy(_args, args.begin(), sizeof(RewriterVar*) * args_size);
assert(additional_uses.size() < 1 << 8);
unsigned char num_additional = additional_uses.size();
RewriterVar** args_array = args_array_ref.data();
RewriterAction* call_action = addAction([=]() {
this->_emitPPCall(result, func_addr, llvm::ArrayRef<RewriterVar*>(_args, args_size), num_slots, slot_size,
ast_node);
}, args, ActionType::NORMAL);
RewriterAction* call_action
= addAction([this, result, func_addr, ast_node, args_array, args_size, slot_size, num_slots, num_additional]() {
auto all_args = llvm::makeArrayRef(args_array, args_size + num_additional);
auto args = all_args.slice(0, args_size);
this->_emitPPCall(result, func_addr, args, num_slots, slot_size, ast_node, all_args);
}, args_array_ref, ActionType::NORMAL);
if (type_recorder) {
RewriterVar* type_recorder_var = imm(type_recorder);
RewriterVar* obj_cls_var = result->getAttr(offsetof(Box, cls));
addAction([=]() { _emitRecordType(type_recorder_var, obj_cls_var); }, { type_recorder_var, obj_cls_var },
ActionType::NORMAL);
result->refUsed();
addAction([=]() {
_emitRecordType(type_recorder_var, obj_cls_var);
result->bumpUse();
}, { type_recorder_var, obj_cls_var, result }, ActionType::NORMAL);
emitPendingCallsCheck();
return std::make_pair(result, call_action);
......@@ -952,12 +969,12 @@ Box* JitFragmentWriter::hasnextHelper(Box* b) {
return boxBool(pyston::hasnext(b));
}
Box* JitFragmentWriter::nonzeroHelper(Box* b) {
return boxBool(b->nonzeroIC());
BORROWED(Box*) JitFragmentWriter::nonzeroHelper(Box* b) {
return b->nonzeroIC() ? True : False;
}
Box* JitFragmentWriter::notHelper(Box* b) {
return boxBool(!b->nonzeroIC());
BORROWED(Box*) JitFragmentWriter::notHelper(Box* b) {
return b->nonzeroIC() ? False : True;
}
Box* JitFragmentWriter::runtimeCallHelper(Box* obj, ArgPassSpec argspec, TypeRecorder* type_recorder, Box** args,
......@@ -972,7 +989,7 @@ void JitFragmentWriter::_emitGetLocal(RewriterVar* val_var, const char* name) {
assembler::Register var_reg = val_var->getInReg();
assembler->test(var_reg, var_reg);
_setupCall(false, RewriterVar::SmallVector(), RewriterVar::SmallVector());
_setupCall(false, {});
{
assembler::ForwardJump jnz(*assembler, assembler::COND_NOT_ZERO);
assembler->mov(assembler::Immediate((uint64_t)name), assembler::RDI);
......@@ -1001,8 +1018,10 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, ExitInfo
exit_info.exit_start = assembler->curInstPointer();
block_next->getInReg(assembler::RAX, true);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12);
assembler->pop(assembler::R13);
assembler->pop(assembler::R14);
assembler->pop(assembler::R15);
assembler->retq();
// make sure we have at least 'min_patch_size' of bytes available.
......@@ -1034,8 +1053,10 @@ void JitFragmentWriter::_emitOSRPoint() {
assembler->clear_reg(assembler::RAX); // = next block to execute
assembler->mov(assembler::Immediate(ASTInterpreterJitInterface::osr_dummy_value), assembler::RDX);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12);
assembler->pop(assembler::R13);
assembler->pop(assembler::R14);
assembler->pop(assembler::R15);
assembler->retq();
}
interp->bumpUse();
......@@ -1043,7 +1064,8 @@ void JitFragmentWriter::_emitOSRPoint() {
}
void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm::ArrayRef<RewriterVar*> args,
int num_slots, int slot_size, AST* ast_node) {
int num_slots, int slot_size, AST* ast_node,
llvm::ArrayRef<RewriterVar*> vars_to_bump) {
assembler::Register r = allocReg(assembler::R11);
if (args.size() > 6) { // only 6 args can get passed in registers.
......@@ -1052,11 +1074,9 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm::
assembler::Register reg = args[i]->getInReg(Location::any(), true);
assembler->mov(reg, assembler::Indirect(assembler::RSP, sizeof(void*) * (i - 6)));
}
RewriterVar::SmallVector reg_args(args.begin(), args.begin() + 6);
assert(reg_args.size() == 6);
_setupCall(false, reg_args, RewriterVar::SmallVector());
_setupCall(false, args.slice(0, 6), {}, Location::any(), vars_to_bump);
} else
_setupCall(false, args, RewriterVar::SmallVector());
_setupCall(false, args, {}, Location::any(), vars_to_bump);
if (failed)
return;
......@@ -1101,15 +1121,8 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm::
result->releaseIfNoUses();
// TODO: it would be nice to be able to bumpUse on these earlier so that we could potentially avoid spilling
// the args across the call if we don't need to.
// This had to get moved to the very end of this function due to the fact that bumpUse can cause refcounting
// operations to happen.
// I'm not sure though that just moving this earlier is good enough though -- we also might need to teach setupCall
// that the args might not be needed afterwards?
// Anyway this feels like micro-optimizations at the moment and we can figure it out later.
for (RewriterVar* arg : args) {
arg->bumpUse();
for (RewriterVar* use : vars_to_bump) {
use->bumpUseLateIfNecessary();
}
}
......@@ -1137,8 +1150,10 @@ void JitFragmentWriter::_emitReturn(RewriterVar* return_val) {
return_val->getInReg(assembler::RDX, true);
assembler->clear_reg(assembler::RAX);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12);
assembler->pop(assembler::R13);
assembler->pop(assembler::R14);
assembler->pop(assembler::R15);
assembler->retq();
return_val->bumpUse();
}
......@@ -1156,12 +1171,12 @@ void JitFragmentWriter::_emitSideExit(STOLEN(RewriterVar*) var, RewriterVar* val
// Really, we should probably do a decref on either side post-jump.
// But the automatic refcounter doesn't support that, and since the value is either True or False,
// we can get away with doing the decref early.
// TODO: better solution is to just make NONZERO return a borrowed ref, so we don't have to decref at all.
_decref(var);
// Hax: override the automatic refcount system
assert(var->reftype == RefType::OWNED);
if (var->reftype == RefType::OWNED) {
_decref(var);
// Hax: override the automatic refcount system
var->reftype = RefType::BORROWED;
}
assert(var->num_refs_consumed == 0);
var->reftype = RefType::BORROWED;
assembler::Register var_reg = var->getInReg();
if (isLargeConstant(val)) {
......
......@@ -23,6 +23,9 @@
namespace pyston {
// passes MAP_32BIT to mmap when allocating the memory for the bjit code.
// it's nice for inspecting the generated asm because the debugger is able to show the name of called C/C++ functions
#define ENABLE_BASELINEJIT_MAP_32BIT 1
#define ENABLE_BASELINEJIT_ICS 1
class AST_stmt;
......@@ -70,8 +73,9 @@ class JitFragmentWriter;
// register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance.
//
// We use the following callee-save regs to speed up the generated code:
// r13: pointer to ASTInterpreter instance
// r14: pointer to the vregs array
// r12, r15: temporary values
// r13: pointer to ASTInterpreter instance
// r14: pointer to the vregs array
//
// To execute a specific CFGBlock one has to call:
// CFGBlock* block;
......@@ -90,8 +94,10 @@ class JitFragmentWriter;
//
// Basic layout of generated code block is:
// entry_code:
// push %r15 ; save r15
// push %r14 ; save r14
// push %r13 ; save r13
// push %r12 ; save r12
// sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the
// stack + 8 byte for stack alignment
// mov %rdi,%r13 ; copy the pointer to ASTInterpreter instance into r13
......@@ -107,8 +113,10 @@ class JitFragmentWriter;
// jne end_side_exit
// movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg)
// add $0x118,%rsp ; restore stack pointer
// pop %r12 ; restore r12
// pop %r13 ; restore r13
// pop %r14 ; restore r14
// pop %r15 ; restore r15
// ret ; exit to the interpreter which will interpret the specified CFGBLock*
// end_side_exit:
// ....
......@@ -120,8 +128,10 @@ class JitFragmentWriter;
// in this case 0 which means we are finished
// movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return
// add $0x118,%rsp ; restore stack pointer
// pop %r12 ; restore r12
// pop %r13 ; restore r13
// pop %r14 ; restore r14
// pop %r15 ; restore r15
// ret
//
// nth_JitFragment:
......@@ -140,8 +150,18 @@ public:
static constexpr int sp_adjustment = scratch_size + num_stack_args * 8 + 8 /* = alignment */;
private:
struct MemoryManager {
private:
uint8_t* addr;
public:
MemoryManager();
~MemoryManager();
uint8_t* get() { return addr; }
};
// the memory block contains the EH frame directly followed by the generated machine code.
std::unique_ptr<uint8_t[]> memory;
MemoryManager memory;
int entry_offset;
assembler::Assembler a;
bool is_currently_writing;
......@@ -234,7 +254,7 @@ public:
RewriterVar* emitGetBoxedLocal(BoxedString* s);
RewriterVar* emitGetBoxedLocals();
RewriterVar* emitGetClsAttr(RewriterVar* obj, BoxedString* s);
RewriterVar* emitGetGlobal(Box* global, BoxedString* s);
RewriterVar* emitGetGlobal(BoxedString* s);
RewriterVar* emitGetItem(AST_expr* node, RewriterVar* value, RewriterVar* slice);
RewriterVar* emitGetLocal(InternedString s, int vreg);
RewriterVar* emitGetPystonIter(RewriterVar* v);
......@@ -265,10 +285,10 @@ public:
void emitRaise3(RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2);
void emitReturn(RewriterVar* v);
void emitSetAttr(AST_expr* node, RewriterVar* obj, BoxedString* s, STOLEN(RewriterVar*) attr);
void emitSetBlockLocal(InternedString s, STOLEN(RewriterVar*) v);
void emitSetBlockLocal(InternedString s, int vreg, STOLEN(RewriterVar*) v);
void emitSetCurrentInst(AST_stmt* node);
void emitSetExcInfo(RewriterVar* type, RewriterVar* value, RewriterVar* traceback);
void emitSetGlobal(Box* global, BoxedString* s, STOLEN(RewriterVar*) v);
void emitSetGlobal(BoxedString* s, STOLEN(RewriterVar*) v, bool are_globals_from_module);
void emitSetItemName(BoxedString* s, RewriterVar* v);
void emitSetItem(RewriterVar* target, RewriterVar* slice, RewriterVar* value);
void emitSetLocal(InternedString s, int vreg, bool set_closure, STOLEN(RewriterVar*) v);
......@@ -296,8 +316,9 @@ private:
RewriterVar* emitCallWithAllocatedArgs(void* func_addr, const llvm::ArrayRef<RewriterVar*> args,
const llvm::ArrayRef<RewriterVar*> additional_uses);
std::pair<RewriterVar*, RewriterAction*> emitPPCall(void* func_addr, llvm::ArrayRef<RewriterVar*> args,
int num_slots, int slot_size, AST* ast_node = NULL,
TypeRecorder* type_recorder = NULL);
unsigned char num_slots, unsigned short slot_size,
AST* ast_node = NULL, TypeRecorder* type_recorder = NULL,
llvm::ArrayRef<RewriterVar*> additional_uses = {});
static void assertNameDefinedHelper(const char* id);
static Box* callattrHelper(Box* obj, BoxedString* attr, CallattrFlags flags, TypeRecorder* type_recorder,
......@@ -308,8 +329,8 @@ private:
static Box* createTupleHelper(uint64_t num, Box** data);
static Box* exceptionMatchesHelper(Box* obj, Box* cls);
static Box* hasnextHelper(Box* b);
static Box* nonzeroHelper(Box* b);
static Box* notHelper(Box* b);
static BORROWED(Box*) nonzeroHelper(Box* b);
static BORROWED(Box*) notHelper(Box* b);
static Box* runtimeCallHelper(Box* obj, ArgPassSpec argspec, TypeRecorder* type_recorder, Box** args,
std::vector<BoxedString*>* keyword_names);
......@@ -317,7 +338,7 @@ private:
void _emitJump(CFGBlock* b, RewriterVar* block_next, ExitInfo& exit_info);
void _emitOSRPoint();
void _emitPPCall(RewriterVar* result, void* func_addr, llvm::ArrayRef<RewriterVar*> args, int num_slots,
int slot_size, AST* ast_node);
int slot_size, AST* ast_node, llvm::ArrayRef<RewriterVar*> vars_to_bump);
void _emitRecordType(RewriterVar* type_recorder_var, RewriterVar* obj_cls_var);
void _emitReturn(RewriterVar* v);
void _emitSideExit(STOLEN(RewriterVar*) var, RewriterVar* val_constant, CFGBlock* next_block,
......
......@@ -593,11 +593,7 @@ public:
assert(l.stack_second_offset % 8 == 0);
b = b_ptr[l.stack_second_offset / 8];
} else if (l.type == Location::Register) {
RELEASE_ASSERT(0, "untested");
// This branch should never get hit since we shouldn't generate Register locations,
// since we don't allow allocating callee-save registers.
// If we did, this code might be right:
// b = (Box*)get_cursor_reg(cursor, l.regnum);
b = (Box*)get_cursor_reg(cursor, l.regnum);
} else {
RELEASE_ASSERT(0, "not implemented");
}
......
......@@ -4870,32 +4870,32 @@ Box* callCLFunc(FunctionMetadata* md, CallRewriteArgs* rewrite_args, int num_out
} else {
// Hacky workaround: the rewriter can only pass arguments in registers, so use this helper function
// to unpack some of the additional arguments:
llvm::SmallVector<RewriterVar*, 4> additional_uses;
RewriterVar* arg_array = rewrite_args->rewriter->allocate(4);
arg_vec.push_back(arg_array);
if (num_output_args >= 1)
if (num_output_args >= 1) {
arg_array->setAttr(0, rewrite_args->arg1, RewriterVar::SetattrType::REF_USED);
if (num_output_args >= 2)
additional_uses.push_back(rewrite_args->arg1);
}
if (num_output_args >= 2) {
arg_array->setAttr(8, rewrite_args->arg2, RewriterVar::SetattrType::REF_USED);
if (num_output_args >= 3)
additional_uses.push_back(rewrite_args->arg2);
}
if (num_output_args >= 3) {
arg_array->setAttr(16, rewrite_args->arg3, RewriterVar::SetattrType::REF_USED);
if (num_output_args >= 4)
additional_uses.push_back(rewrite_args->arg3);
}
if (num_output_args >= 4) {
arg_array->setAttr(24, rewrite_args->args, RewriterVar::SetattrType::REF_USED);
additional_uses.push_back(rewrite_args->args);
}
if (S == CXX)
rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)astInterpretHelper, arg_vec)
->setType(RefType::OWNED);
rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)astInterpretHelper, arg_vec, {},
additional_uses)->setType(RefType::OWNED);
else
rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)astInterpretHelperCapi, arg_vec)
->setType(RefType::OWNED);
if (num_output_args >= 1)
rewrite_args->arg1->refUsed();
if (num_output_args >= 2)
rewrite_args->arg2->refUsed();
if (num_output_args >= 3)
rewrite_args->arg3->refUsed();
if (num_output_args >= 4)
rewrite_args->args->refUsed();
rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)astInterpretHelperCapi, arg_vec,
{}, additional_uses)->setType(RefType::OWNED);
}
rewrite_args->out_success = true;
......@@ -5732,8 +5732,8 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit
bool neg = (op_type == AST_TYPE::IsNot);
if (rewrite_args) {
RewriterVar* cmpres = rewrite_args->lhs->cmp(neg ? AST_TYPE::NotEq : AST_TYPE::Eq, rewrite_args->rhs,
rewrite_args->destination);
RewriterVar* cmpres
= rewrite_args->lhs->cmp(neg ? AST_TYPE::NotEq : AST_TYPE::Eq, rewrite_args->rhs, assembler::RDI);
rewrite_args->out_rtn
= rewrite_args->rewriter->call(false, (void*)boxBool, cmpres)->setType(RefType::OWNED);
rewrite_args->out_success = true;
......@@ -7302,6 +7302,9 @@ extern "C" Box* getGlobal(Box* globals, BoxedString* name) {
}
extern "C" void setGlobal(Box* globals, BoxedString* name, STOLEN(Box*) value) {
static StatCounter slowpath_setglobal("slowpath_setglobal");
slowpath_setglobal.log();
if (globals->cls == attrwrapper_cls) {
globals = unwrapAttrWrapper(globals);
RELEASE_ASSERT(globals->cls == module_cls, "%s", globals->cls->tp_name);
......
# fail-if: '-n' in EXTRA_JIT_ARGS or '-O' in EXTRA_JIT_ARGS
# expected: fail
# this only works in the interpreter and not in the bjit and llvm jit
class C(object):
def next(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment