Commit f0e39c1b authored by Marius Wachtler's avatar Marius Wachtler

bjit: use a bitset for register tracking in the rewriter and use r12, r15,...

bjit: use a bitset for register tracking in the rewriter and use r12, r15, rbx, rbp in PPs and the bjit
Keeping the available registers in a bitset makes it more memory efficient and
also easier and more performant to calculate a subset of the registers.
I will soon implement the 'otherThan' functionality using it which would fix the current problem of only allowing to exclude one register
parent a0b70207
...@@ -241,7 +241,7 @@ int ICInfo::calculateSuggestedSize() { ...@@ -241,7 +241,7 @@ int ICInfo::calculateSuggestedSize() {
if (!times_rewritten) if (!times_rewritten)
return slots[0].size; return slots[0].size;
int additional_space_per_slot = 30; int additional_space_per_slot = 50;
// if there are less rewrites than slots we can give a very accurate estimate // if there are less rewrites than slots we can give a very accurate estimate
if (times_rewritten < slots.size()) { if (times_rewritten < slots.size()) {
// add up the sizes of all used slots // add up the sizes of all used slots
...@@ -316,7 +316,8 @@ static llvm::DenseMap<void*, ICInfo*> ics_by_return_addr; ...@@ -316,7 +316,8 @@ static llvm::DenseMap<void*, ICInfo*> ics_by_return_addr;
ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size, ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size,
llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs, assembler::GenericRegister return_register, llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs, assembler::GenericRegister return_register,
TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations) TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations,
assembler::RegisterSet allocatable_registers)
: next_slot_to_try(0), : next_slot_to_try(0),
stack_info(stack_info), stack_info(stack_info),
calling_conv(calling_conv), calling_conv(calling_conv),
...@@ -326,6 +327,7 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S ...@@ -326,6 +327,7 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S
retry_in(0), retry_in(0),
retry_backoff(1), retry_backoff(1),
times_rewritten(0), times_rewritten(0),
allocatable_registers(allocatable_registers),
ic_global_decref_locations(std::move(ic_global_decref_locations)), ic_global_decref_locations(std::move(ic_global_decref_locations)),
start_addr(start_addr), start_addr(start_addr),
slowpath_rtn_addr(slowpath_rtn_addr), slowpath_rtn_addr(slowpath_rtn_addr),
...@@ -387,7 +389,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* ...@@ -387,7 +389,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
ICInfo* icinfo ICInfo* icinfo
= new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->size, ic->getCallingConvention(), = new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->size, ic->getCallingConvention(),
std::move(live_outs), return_register, ic->type_recorder, decref_info); std::move(live_outs), return_register, ic->type_recorder, decref_info, ic->allocatable_regs);
assert(!ics_by_return_addr.count(slowpath_rtn_addr)); assert(!ics_by_return_addr.count(slowpath_rtn_addr));
ics_by_return_addr[slowpath_rtn_addr] = icinfo; ics_by_return_addr[slowpath_rtn_addr] = icinfo;
......
...@@ -94,6 +94,7 @@ private: ...@@ -94,6 +94,7 @@ private:
TypeRecorder* const type_recorder; TypeRecorder* const type_recorder;
int retry_in, retry_backoff; int retry_in, retry_backoff;
int times_rewritten; int times_rewritten;
assembler::RegisterSet allocatable_registers;
DecrefInfo slowpath_decref_info; DecrefInfo slowpath_decref_info;
// This is a vector of locations which always need to get decrefed inside this IC. // This is a vector of locations which always need to get decrefed inside this IC.
...@@ -107,7 +108,8 @@ private: ...@@ -107,7 +108,8 @@ private:
public: public:
ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size, ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size,
llvm::CallingConv::ID calling_conv, LiveOutSet live_outs, assembler::GenericRegister return_register, llvm::CallingConv::ID calling_conv, LiveOutSet live_outs, assembler::GenericRegister return_register,
TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations); TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations,
assembler::RegisterSet allocatable_registers = assembler::RegisterSet::stdAllocatable());
~ICInfo(); ~ICInfo();
void* const start_addr, *const slowpath_rtn_addr, *const continue_addr; void* const start_addr, *const slowpath_rtn_addr, *const continue_addr;
...@@ -133,6 +135,8 @@ public: ...@@ -133,6 +135,8 @@ public:
int percentBackedoff() const { return retry_backoff; } int percentBackedoff() const { return retry_backoff; }
int timesRewritten() const { return times_rewritten; } int timesRewritten() const { return times_rewritten; }
assembler::RegisterSet getAllocatableRegs() const { return allocatable_registers; }
friend class ICSlotRewrite; friend class ICSlotRewrite;
static ICInfo* getICInfoForNode(AST* node); static ICInfo* getICInfoForNode(AST* node);
......
...@@ -646,10 +646,9 @@ void Rewriter::_cmp(RewriterVar* result, RewriterVar* v1, AST_TYPE::AST_TYPE cmp ...@@ -646,10 +646,9 @@ void Rewriter::_cmp(RewriterVar* result, RewriterVar* v1, AST_TYPE::AST_TYPE cmp
v1->bumpUseEarlyIfPossible(); v1->bumpUseEarlyIfPossible();
v2->bumpUseEarlyIfPossible(); v2->bumpUseEarlyIfPossible();
// sete and setne has special register requirements (can't use r8-r15) // sete and setne has special register requirements
const assembler::Register valid_registers[] = { auto set_inst_valid_registers = assembler::RAX | assembler::RBX | assembler::RCX | assembler::RDX;
assembler::RAX, assembler::RCX, assembler::RDX, assembler::RSI, assembler::RDI, auto valid_registers = set_inst_valid_registers & allocatable_regs;
};
assembler::Register newvar_reg = allocReg(dest, Location::any(), valid_registers); assembler::Register newvar_reg = allocReg(dest, Location::any(), valid_registers);
result->initializeInReg(newvar_reg); result->initializeInReg(newvar_reg);
assembler->cmp(v1_reg, v2_reg); assembler->cmp(v1_reg, v2_reg);
...@@ -849,6 +848,9 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_ ...@@ -849,6 +848,9 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_
Location l(*locations.begin()); Location l(*locations.begin());
assembler::Register reg = rewriter->allocReg(dest, otherThan); assembler::Register reg = rewriter->allocReg(dest, otherThan);
if (rewriter->failed)
return reg;
assert(rewriter->vars_by_location.count(reg) == 0); assert(rewriter->vars_by_location.count(reg) == 0);
if (l.type == Location::Scratch || l.type == Location::Stack) { if (l.type == Location::Scratch || l.type == Location::Stack) {
...@@ -1979,9 +1981,10 @@ void Rewriter::spillRegister(assembler::Register reg, Location preserve) { ...@@ -1979,9 +1981,10 @@ void Rewriter::spillRegister(assembler::Register reg, Location preserve) {
} }
// First, try to spill into a callee-save register: // First, try to spill into a callee-save register:
for (assembler::Register new_reg : allocatable_regs) { auto callee_save_allocatable_regs = allocatable_regs & assembler::RegisterSet::getCalleeSave();
if (!new_reg.isCalleeSave()) for (assembler::Register new_reg : callee_save_allocatable_regs) {
continue; assert(new_reg.isCalleeSave());
if (vars_by_location.count(new_reg)) if (vars_by_location.count(new_reg))
continue; continue;
if (Location(new_reg) == preserve) if (Location(new_reg) == preserve)
...@@ -2023,8 +2026,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) { ...@@ -2023,8 +2026,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) {
return allocReg(dest, otherThan, allocatable_regs); return allocReg(dest, otherThan, allocatable_regs);
} }
assembler::Register Rewriter::allocReg(Location dest, Location otherThan, assembler::Register Rewriter::allocReg(Location dest, Location otherThan, assembler::RegisterSet valid_registers) {
llvm::ArrayRef<assembler::Register> valid_registers) {
assertPhaseEmitting(); assertPhaseEmitting();
if (dest.type == Location::AnyReg) { if (dest.type == Location::AnyReg) {
...@@ -2063,7 +2065,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan, ...@@ -2063,7 +2065,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan,
assert(failed || vars_by_location.count(best_reg) == 0); assert(failed || vars_by_location.count(best_reg) == 0);
return best_reg; return best_reg;
} else if (dest.type == Location::Register) { } else if (dest.type == Location::Register) {
assert(std::find(valid_registers.begin(), valid_registers.end(), dest.asRegister()) != valid_registers.end()); assert(valid_registers.isInside(dest.asRegister()));
assembler::Register reg(dest.regnum); assembler::Register reg(dest.regnum);
if (vars_by_location.count(reg)) { if (vars_by_location.count(reg)) {
...@@ -2220,7 +2222,7 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const L ...@@ -2220,7 +2222,7 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const L
marked_inside_ic(false), marked_inside_ic(false),
done_guarding(false), done_guarding(false),
last_guard_action(-1), last_guard_action(-1),
allocatable_regs(std_allocatable_regs) { allocatable_regs(this->rewrite->getICInfo()->getAllocatableRegs()) {
initPhaseCollecting(); initPhaseCollecting();
finished = false; finished = false;
......
...@@ -516,8 +516,7 @@ protected: ...@@ -516,8 +516,7 @@ protected:
// Allocates a register. dest must be of type Register or AnyReg // Allocates a register. dest must be of type Register or AnyReg
// If otherThan is a register, guaranteed to not use that register. // If otherThan is a register, guaranteed to not use that register.
assembler::Register allocReg(Location dest, Location otherThan = Location::any()); assembler::Register allocReg(Location dest, Location otherThan = Location::any());
assembler::Register allocReg(Location dest, Location otherThan, assembler::Register allocReg(Location dest, Location otherThan, assembler::RegisterSet valid_registers);
llvm::ArrayRef<assembler::Register> valid_registers);
assembler::XMMRegister allocXMMReg(Location dest, Location otherThan = Location::any()); assembler::XMMRegister allocXMMReg(Location dest, Location otherThan = Location::any());
// Allocates an 8-byte region in the scratch space // Allocates an 8-byte region in the scratch space
Location allocScratch(); Location allocScratch();
...@@ -609,7 +608,7 @@ protected: ...@@ -609,7 +608,7 @@ protected:
#endif #endif
} }
llvm::ArrayRef<assembler::Register> allocatable_regs; assembler::RegisterSet allocatable_regs;
public: public:
// This should be called exactly once for each argument // This should be called exactly once for each argument
......
...@@ -35,6 +35,7 @@ namespace assembler { ...@@ -35,6 +35,7 @@ namespace assembler {
class Assembler; class Assembler;
struct RegisterSet;
struct Register { struct Register {
int regnum; int regnum;
...@@ -52,27 +53,80 @@ struct Register { ...@@ -52,27 +53,80 @@ struct Register {
static Register fromDwarf(int dwarf_regnum); static Register fromDwarf(int dwarf_regnum);
static constexpr int numRegs() { return 16; } static constexpr int numRegs() { return 16; }
constexpr RegisterSet operator|(Register b) const;
}; };
const Register RAX(0); constexpr Register RAX(0);
const Register RCX(1); constexpr Register RCX(1);
const Register RDX(2); constexpr Register RDX(2);
const Register RBX(3); constexpr Register RBX(3);
const Register RSP(4); constexpr Register RSP(4);
const Register RBP(5); constexpr Register RBP(5);
const Register RSI(6); constexpr Register RSI(6);
const Register RDI(7); constexpr Register RDI(7);
const Register R8(8); constexpr Register R8(8);
const Register R9(9); constexpr Register R9(9);
const Register R10(10); constexpr Register R10(10);
const Register R11(11); constexpr Register R11(11);
const Register R12(12); constexpr Register R12(12);
const Register R13(13); constexpr Register R13(13);
const Register R14(14); constexpr Register R14(14);
const Register R15(15); constexpr Register R15(15);
struct RegisterSet {
typedef unsigned int Regs;
Regs regs;
constexpr explicit RegisterSet(Regs regs) : regs(regs) {}
constexpr RegisterSet(Register reg) : regs(1ul << reg.regnum) {}
static constexpr RegisterSet getCalleeSave() { return RBX | RSP | RBP | R12 | R13 | R14 | R15; }
static constexpr RegisterSet stdAllocatable() { return RAX | RCX | RDX | RDI | RSI | R8 | R9 | R10 | R11; }
bool isInside(Register reg) const { return regs & (1ul << reg.regnum); }
bool empty() const { return regs == 0; }
class iterator {
public:
const RegisterSet& set;
int i;
iterator(const RegisterSet& set, int i) : set(set), i(i) {}
iterator& operator++() {
do {
i++;
} while (i < Register::numRegs() && !set.isInside(Register(i)));
if (i > Register::numRegs())
i = Register::numRegs();
return *this;
}
bool operator==(const iterator& rhs) const { return i == rhs.i; }
bool operator!=(const iterator& rhs) const { return !(*this == rhs); }
Register operator*() { return Register(i); }
};
iterator begin() const {
if (empty())
return end();
return iterator(*this, __builtin_ctz(regs));
}
iterator end() const { return iterator(*this, Register::numRegs()); }
constexpr RegisterSet operator|(RegisterSet b) const { return RegisterSet(regs | b.regs); }
constexpr RegisterSet operator&(RegisterSet b) const { return RegisterSet(regs & b.regs); }
void operator|=(RegisterSet b) { regs |= b.regs; }
void operator&=(RegisterSet b) { regs &= b.regs; }
};
constexpr RegisterSet Register::operator|(Register b) const {
return RegisterSet(*this) | RegisterSet(b);
}
inline bool Register::isCalleeSave() { inline bool Register::isCalleeSave() {
return *this == RBX || *this == RSP || *this == RBP || regnum >= 12; return RegisterSet::getCalleeSave().isInside(*this);
} }
struct Indirect { struct Indirect {
......
...@@ -43,6 +43,8 @@ static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations; ...@@ -43,6 +43,8 @@ static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations;
// asm volatile ("" ::: "r14"); // asm volatile ("" ::: "r14");
// asm volatile ("" ::: "r13"); // asm volatile ("" ::: "r13");
// asm volatile ("" ::: "r12"); // asm volatile ("" ::: "r12");
// asm volatile ("" ::: "rbx");
// asm volatile ("" ::: "rbp");
// char scratch[256+16]; // char scratch[256+16];
// foo(scratch); // foo(scratch);
// } // }
...@@ -50,15 +52,16 @@ static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations; ...@@ -50,15 +52,16 @@ static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations;
// It omits the frame pointer but saves r12, r13, r14 and r15 // It omits the frame pointer but saves r12, r13, r14 and r15
// use 'objdump -s -j .eh_frame <obj.file>' to dump it // use 'objdump -s -j .eh_frame <obj.file>' to dump it
const unsigned char eh_info[] const unsigned char eh_info[]
= { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, = { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, 0x1b,
0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42, 0x0e, 0x18, 0x42, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x41, 0x0e, 0x10, 0x42, 0x0e, 0x18, 0x42, 0x0e, 0x20, 0x42,
0x0e, 0x20, 0x42, 0x0e, 0x28, 0x47, 0x0e, 0xc0, 0x02, 0x8c, 0x05, 0x8d, 0x04, 0x8e, 0x03, 0x8f, 0x0e, 0x28, 0x42, 0x0e, 0x30, 0x41, 0x0e, 0x38, 0x47, 0x0e, 0xd0, 0x02, 0x83, 0x07, 0x8c, 0x06, 0x8d,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; 0x05, 0x8e, 0x04, 0x8f, 0x03, 0x86, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
static_assert(JitCodeBlock::num_stack_args == 2, "have to update EH table!"); static_assert(JitCodeBlock::num_stack_args == 2, "have to update EH table!");
static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!"); static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!");
constexpr int code_size = JitCodeBlock::memory_size - sizeof(eh_info); constexpr int code_size = JitCodeBlock::memory_size - sizeof(eh_info);
constexpr assembler::RegisterSet JitCodeBlock::additional_regs;
JitCodeBlock::MemoryManager::MemoryManager() { JitCodeBlock::MemoryManager::MemoryManager() {
int protection = PROT_READ | PROT_WRITE | PROT_EXEC; int protection = PROT_READ | PROT_WRITE | PROT_EXEC;
...@@ -86,10 +89,12 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name) ...@@ -86,10 +89,12 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
uint8_t* code = a.curInstPointer(); uint8_t* code = a.curInstPointer();
// emit prolog // emit prolog
a.push(assembler::RBP);
a.push(assembler::R15); a.push(assembler::R15);
a.push(assembler::R14); a.push(assembler::R14);
a.push(assembler::R13); a.push(assembler::R13);
a.push(assembler::R12); a.push(assembler::R12);
a.push(assembler::RBX);
static_assert(sp_adjustment % 16 == 8, "stack isn't aligned"); static_assert(sp_adjustment % 16 == 8, "stack isn't aligned");
a.sub(assembler::Immediate(sp_adjustment), assembler::RSP); a.sub(assembler::Immediate(sp_adjustment), assembler::RSP);
a.mov(assembler::RDI, assembler::R13); // interpreter pointer a.mov(assembler::RDI, assembler::R13); // interpreter pointer
...@@ -131,9 +136,10 @@ std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, in ...@@ -131,9 +136,10 @@ std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, in
void* fragment_start = a.curInstPointer() - patch_jump_offset; void* fragment_start = a.curInstPointer() - patch_jump_offset;
long fragment_offset = a.bytesWritten() - patch_jump_offset; long fragment_offset = a.bytesWritten() - patch_jump_offset;
long bytes_left = a.bytesLeft() + patch_jump_offset; long bytes_left = a.bytesLeft() + patch_jump_offset;
constexpr assembler::RegisterSet bjit_allocatable_regs = assembler::RegisterSet::stdAllocatable() | additional_regs;
std::unique_ptr<ICInfo> ic_info(new ICInfo(fragment_start, nullptr, nullptr, stack_info, bytes_left, std::unique_ptr<ICInfo> ic_info(new ICInfo(fragment_start, nullptr, nullptr, stack_info, bytes_left,
llvm::CallingConv::C, live_outs, assembler::RAX, 0, llvm::CallingConv::C, live_outs, assembler::RAX, 0,
std::vector<Location>())); std::vector<Location>(), bjit_allocatable_regs));
std::unique_ptr<ICSlotRewrite> rewrite = ic_info->startRewrite(""); std::unique_ptr<ICSlotRewrite> rewrite = ic_info->startRewrite("");
return std::unique_ptr<JitFragmentWriter>(new JitFragmentWriter( return std::unique_ptr<JitFragmentWriter>(new JitFragmentWriter(
...@@ -156,13 +162,6 @@ void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping ...@@ -156,13 +162,6 @@ void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping
ic_info.appendDecrefInfosTo(decref_infos); ic_info.appendDecrefInfosTo(decref_infos);
} }
static const assembler::Register bjit_allocatable_regs[]
= { assembler::RAX, assembler::RCX, assembler::RDX,
// no RSP
// no RBP
assembler::RDI, assembler::RSI, assembler::R8, assembler::R9,
assembler::R10, assembler::R11, assembler::R12, assembler::R15 };
JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info, JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info,
std::unique_ptr<ICSlotRewrite> rewrite, int code_offset, int num_bytes_overlapping, std::unique_ptr<ICSlotRewrite> rewrite, int code_offset, int num_bytes_overlapping,
void* entry_code, JitCodeBlock& code_block) void* entry_code, JitCodeBlock& code_block)
...@@ -175,7 +174,6 @@ JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic ...@@ -175,7 +174,6 @@ JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic
code_block(code_block), code_block(code_block),
interp(0), interp(0),
ic_info(std::move(ic_info)) { ic_info(std::move(ic_info)) {
allocatable_regs = bjit_allocatable_regs;
added_changing_action = true; added_changing_action = true;
...@@ -1062,10 +1060,12 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, ExitInfo ...@@ -1062,10 +1060,12 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, ExitInfo
exit_info.exit_start = assembler->curInstPointer(); exit_info.exit_start = assembler->curInstPointer();
block_next->getInReg(assembler::RAX, true); block_next->getInReg(assembler::RAX, true);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP); assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::RBX);
assembler->pop(assembler::R12); assembler->pop(assembler::R12);
assembler->pop(assembler::R13); assembler->pop(assembler::R13);
assembler->pop(assembler::R14); assembler->pop(assembler::R14);
assembler->pop(assembler::R15); assembler->pop(assembler::R15);
assembler->pop(assembler::RBP);
assembler->retq(); assembler->retq();
// make sure we have at least 'min_patch_size' of bytes available. // make sure we have at least 'min_patch_size' of bytes available.
...@@ -1097,10 +1097,12 @@ void JitFragmentWriter::_emitOSRPoint() { ...@@ -1097,10 +1097,12 @@ void JitFragmentWriter::_emitOSRPoint() {
assembler->clear_reg(assembler::RAX); // = next block to execute assembler->clear_reg(assembler::RAX); // = next block to execute
assembler->mov(assembler::Immediate(ASTInterpreterJitInterface::osr_dummy_value), assembler::RDX); assembler->mov(assembler::Immediate(ASTInterpreterJitInterface::osr_dummy_value), assembler::RDX);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP); assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::RBX);
assembler->pop(assembler::R12); assembler->pop(assembler::R12);
assembler->pop(assembler::R13); assembler->pop(assembler::R13);
assembler->pop(assembler::R14); assembler->pop(assembler::R14);
assembler->pop(assembler::R15); assembler->pop(assembler::R15);
assembler->pop(assembler::RBP);
assembler->retq(); assembler->retq();
} }
interp->bumpUse(); interp->bumpUse();
...@@ -1134,7 +1136,14 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm:: ...@@ -1134,7 +1136,14 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm::
uint8_t* pp_end = rewrite->getSlotStart() + assembler->bytesWritten(); uint8_t* pp_end = rewrite->getSlotStart() + assembler->bytesWritten();
assert(assembler->hasFailed() || (pp_start + pp_size + call_size == pp_end)); assert(assembler->hasFailed() || (pp_start + pp_size + call_size == pp_end));
std::unique_ptr<ICSetupInfo> setup_info(ICSetupInfo::initialize(true, pp_size, ICSetupInfo::Generic, NULL)); assembler::RegisterSet regs = assembler::RegisterSet::stdAllocatable();
for (assembler::Register reg : JitCodeBlock::additional_regs) {
if (vars_by_location.count(reg) == 0)
regs |= assembler::RegisterSet(reg);
}
std::unique_ptr<ICSetupInfo> setup_info(ICSetupInfo::initialize(true, pp_size, ICSetupInfo::Generic, NULL, regs));
// calculate available scratch space // calculate available scratch space
int pp_scratch_size = 0; int pp_scratch_size = 0;
...@@ -1190,10 +1199,12 @@ void JitFragmentWriter::_emitReturn(RewriterVar* return_val) { ...@@ -1190,10 +1199,12 @@ void JitFragmentWriter::_emitReturn(RewriterVar* return_val) {
return_val->getInReg(assembler::RDX, true); return_val->getInReg(assembler::RDX, true);
assembler->clear_reg(assembler::RAX); assembler->clear_reg(assembler::RAX);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP); assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::RBX);
assembler->pop(assembler::R12); assembler->pop(assembler::R12);
assembler->pop(assembler::R13); assembler->pop(assembler::R13);
assembler->pop(assembler::R14); assembler->pop(assembler::R14);
assembler->pop(assembler::R15); assembler->pop(assembler::R15);
assembler->pop(assembler::RBP);
assembler->retq(); assembler->retq();
return_val->bumpUse(); return_val->bumpUse();
} }
......
...@@ -73,9 +73,9 @@ class JitFragmentWriter; ...@@ -73,9 +73,9 @@ class JitFragmentWriter;
// register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance. // register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance.
// //
// We use the following callee-save regs to speed up the generated code: // We use the following callee-save regs to speed up the generated code:
// r12, r15: temporary values // rbx, rbp, r12, r15: temporary values
// r13: pointer to ASTInterpreter instance // r13 : pointer to ASTInterpreter instance
// r14: pointer to the vregs array // r14 : pointer to the vregs array
// //
// To execute a specific CFGBlock one has to call: // To execute a specific CFGBlock one has to call:
// CFGBlock* block; // CFGBlock* block;
...@@ -94,10 +94,12 @@ class JitFragmentWriter; ...@@ -94,10 +94,12 @@ class JitFragmentWriter;
// //
// Basic layout of generated code block is: // Basic layout of generated code block is:
// entry_code: // entry_code:
// push %rbp ; save rbp
// push %r15 ; save r15 // push %r15 ; save r15
// push %r14 ; save r14 // push %r14 ; save r14
// push %r13 ; save r13 // push %r13 ; save r13
// push %r12 ; save r12 // push %r12 ; save r12
// push %rbx ; save rbx
// sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the // sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the
// stack + 8 byte for stack alignment // stack + 8 byte for stack alignment
// mov %rdi,%r13 ; copy the pointer to ASTInterpreter instance into r13 // mov %rdi,%r13 ; copy the pointer to ASTInterpreter instance into r13
...@@ -113,10 +115,12 @@ class JitFragmentWriter; ...@@ -113,10 +115,12 @@ class JitFragmentWriter;
// jne end_side_exit // jne end_side_exit
// movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg) // movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg)
// add $0x118,%rsp ; restore stack pointer // add $0x118,%rsp ; restore stack pointer
// pop %rbx ; restore rbx
// pop %r12 ; restore r12 // pop %r12 ; restore r12
// pop %r13 ; restore r13 // pop %r13 ; restore r13
// pop %r14 ; restore r14 // pop %r14 ; restore r14
// pop %r15 ; restore r15 // pop %r15 ; restore r15
// pop %rbp ; restore rbp
// ret ; exit to the interpreter which will interpret the specified CFGBLock* // ret ; exit to the interpreter which will interpret the specified CFGBLock*
// end_side_exit: // end_side_exit:
// .... // ....
...@@ -128,10 +132,12 @@ class JitFragmentWriter; ...@@ -128,10 +132,12 @@ class JitFragmentWriter;
// in this case 0 which means we are finished // in this case 0 which means we are finished
// movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return // movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return
// add $0x118,%rsp ; restore stack pointer // add $0x118,%rsp ; restore stack pointer
// pop %rbx ; restore rbx
// pop %r12 ; restore r12 // pop %r12 ; restore r12
// pop %r13 ; restore r13 // pop %r13 ; restore r13
// pop %r14 ; restore r14 // pop %r14 ; restore r14
// pop %r15 ; restore r15 // pop %r15 ; restore r15
// pop %rbp ; restore rbp
// ret // ret
// //
// nth_JitFragment: // nth_JitFragment:
...@@ -148,6 +154,8 @@ public: ...@@ -148,6 +154,8 @@ public:
// scratch size + space for passing additional args on the stack without having to adjust the SP when calling // scratch size + space for passing additional args on the stack without having to adjust the SP when calling
// functions with more than 6 args. // functions with more than 6 args.
static constexpr int sp_adjustment = scratch_size + num_stack_args * 8 + 8 /* = alignment */; static constexpr int sp_adjustment = scratch_size + num_stack_args * 8 + 8 /* = alignment */;
static constexpr assembler::RegisterSet additional_regs = assembler::RBX | assembler::RBP | assembler::R12
| assembler::R15;
private: private:
struct MemoryManager { struct MemoryManager {
......
...@@ -44,8 +44,10 @@ int ICSetupInfo::totalSize() const { ...@@ -44,8 +44,10 @@ int ICSetupInfo::totalSize() const {
static std::vector<std::pair<PatchpointInfo*, void* /* addr of func to call */>> new_patchpoints; static std::vector<std::pair<PatchpointInfo*, void* /* addr of func to call */>> new_patchpoints;
ICSetupInfo* ICSetupInfo::initialize(bool has_return_value, int size, ICType type, TypeRecorder* type_recorder) { ICSetupInfo* ICSetupInfo::initialize(bool has_return_value, int size, ICType type, TypeRecorder* type_recorder,
ICSetupInfo* rtn = new ICSetupInfo(type, size, has_return_value, type_recorder); assembler::RegisterSet allocatable_regs) {
ICSetupInfo* rtn = new ICSetupInfo(type, size, has_return_value, type_recorder, allocatable_regs);
// We use size == CALL_ONLY_SIZE to imply that the call isn't patchable // We use size == CALL_ONLY_SIZE to imply that the call isn't patchable
assert(rtn->totalSize() > CALL_ONLY_SIZE); assert(rtn->totalSize() > CALL_ONLY_SIZE);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallingConv.h"
#include "asm_writing/types.h"
#include "codegen/stackmaps.h" #include "codegen/stackmaps.h"
#include "core/common.h" #include "core/common.h"
...@@ -64,14 +65,20 @@ public: ...@@ -64,14 +65,20 @@ public:
}; };
private: private:
ICSetupInfo(ICType type, int size, bool has_return_value, TypeRecorder* type_recorder) ICSetupInfo(ICType type, int size, bool has_return_value, TypeRecorder* type_recorder,
: type(type), size(size), has_return_value(has_return_value), type_recorder(type_recorder) {} assembler::RegisterSet allocatable_regs)
: type(type),
size(size),
has_return_value(has_return_value),
type_recorder(type_recorder),
allocatable_regs(allocatable_regs) {}
public: public:
const ICType type; const ICType type;
const int size; const int size;
const bool has_return_value; const bool has_return_value;
TypeRecorder* const type_recorder; TypeRecorder* const type_recorder;
assembler::RegisterSet allocatable_regs;
int totalSize() const; int totalSize() const;
bool hasReturnValue() const { return has_return_value; } bool hasReturnValue() const { return has_return_value; }
...@@ -90,7 +97,8 @@ public: ...@@ -90,7 +97,8 @@ public:
return llvm::CallingConv::C; return llvm::CallingConv::C;
} }
static ICSetupInfo* initialize(bool has_return_value, int size, ICType type, TypeRecorder* type_recorder); static ICSetupInfo* initialize(bool has_return_value, int size, ICType type, TypeRecorder* type_recorder,
assembler::RegisterSet allocatable_regs = assembler::RegisterSet::stdAllocatable());
}; };
struct PatchpointInfo { struct PatchpointInfo {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment