Commit 06274389 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Change some of the low-level initialization of patchpoints

Will make it easier to initialize non-ic patchpoints that
are coming up.
parent dbc15587
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <cstring> #include <cstring>
#include "core/common.h" #include "core/common.h"
#include "core/options.h"
namespace pyston { namespace pyston {
namespace assembler { namespace assembler {
...@@ -31,14 +32,14 @@ void Register::dump() const { ...@@ -31,14 +32,14 @@ void Register::dump() const {
const int dwarf_to_gp[] = { const int dwarf_to_gp[] = {
// http://www.x86-64.org/documentation/abi.pdf#page=57 // http://www.x86-64.org/documentation/abi.pdf#page=57
0, // 0 0, // 0 -> rax
2, // 1 2, // 1 -> rdx
1, // 2 -> rcx 1, // 2 -> rcx
3, // 3 -> rbx 3, // 3 -> rbx
6, // 4 6, // 4 -> rsi
7, // 5 7, // 5 -> rdi
5, // 6 -> rbp 5, // 6 -> rbp
4, // 7 4, // 7 -> rsp
8, // 8 -> r8 8, // 8 -> r8
9, // 9 -> r9 9, // 9 -> r9
10, // 10 -> r10 10, // 10 -> r10
...@@ -331,7 +332,6 @@ void Assembler::movsd(XMMRegister src, Indirect dest) { ...@@ -331,7 +332,6 @@ void Assembler::movsd(XMMRegister src, Indirect dest) {
int dest_idx = dest.base.regnum; int dest_idx = dest.base.regnum;
if (src_idx >= 8) { if (src_idx >= 8) {
trap();
rex |= REX_R; rex |= REX_R;
src_idx -= 8; src_idx -= 8;
} }
...@@ -680,21 +680,21 @@ uint8_t* Assembler::emitCall(void* ptr, Register scratch) { ...@@ -680,21 +680,21 @@ uint8_t* Assembler::emitCall(void* ptr, Register scratch) {
return addr; return addr;
} }
void Assembler::emitBatchPush(StackInfo stack_info, const std::vector<GenericRegister>& to_push) { void Assembler::emitBatchPush(int scratch_rbp_offset, int scratch_size, const std::vector<GenericRegister>& to_push) {
assert(stack_info.has_scratch);
int offset = 0; int offset = 0;
for (const GenericRegister& r : to_push) { for (const GenericRegister& r : to_push) {
assert(stack_info.scratch_bytes >= offset + 8); Indirect next_slot(RBP, offset + scratch_rbp_offset);
Indirect next_slot(RBP, offset + stack_info.scratch_rbp_offset);
if (r.type == GenericRegister::GP) { if (r.type == GenericRegister::GP) {
Register gp = r.gp; Register gp = r.gp;
assert(gp.regnum >= 0 && gp.regnum < 16); assert(gp.regnum >= 0 && gp.regnum < 16);
assert(scratch_size >= offset + 8);
mov(gp, next_slot); mov(gp, next_slot);
offset += 8; offset += 8;
} else if (r.type == GenericRegister::XMM) { } else if (r.type == GenericRegister::XMM) {
XMMRegister reg = r.xmm; XMMRegister reg = r.xmm;
assert(scratch_size >= offset + 8);
movsd(reg, next_slot); movsd(reg, next_slot);
offset += 8; offset += 8;
} else { } else {
...@@ -703,13 +703,12 @@ void Assembler::emitBatchPush(StackInfo stack_info, const std::vector<GenericReg ...@@ -703,13 +703,12 @@ void Assembler::emitBatchPush(StackInfo stack_info, const std::vector<GenericReg
} }
} }
void Assembler::emitBatchPop(StackInfo stack_info, const std::vector<GenericRegister>& to_push) { void Assembler::emitBatchPop(int scratch_rbp_offset, int scratch_size, const std::vector<GenericRegister>& to_push) {
assert(stack_info.has_scratch);
int offset = 0; int offset = 0;
for (const GenericRegister& r : to_push) { for (const GenericRegister& r : to_push) {
assert(stack_info.scratch_bytes >= offset + 8); assert(scratch_size >= offset + 8);
Indirect next_slot(RBP, offset + stack_info.scratch_rbp_offset); Indirect next_slot(RBP, offset + scratch_rbp_offset);
if (r.type == GenericRegister::GP) { if (r.type == GenericRegister::GP) {
Register gp = r.gp; Register gp = r.gp;
...@@ -743,71 +742,5 @@ void Assembler::emitAnnotation(int num) { ...@@ -743,71 +742,5 @@ void Assembler::emitAnnotation(int num) {
cmp(RAX, Immediate(num)); cmp(RAX, Immediate(num));
nop(); nop();
} }
uint8_t* initializePatchpoint2(uint8_t* start_addr, uint8_t* slowpath_start, uint8_t* end_addr, StackInfo stack_info,
const std::unordered_set<int>& live_outs) {
assert(start_addr < slowpath_start);
static const int INITIAL_CALL_SIZE = 13;
assert(end_addr > slowpath_start + INITIAL_CALL_SIZE);
#ifndef NDEBUG
// if (VERBOSITY()) printf("initializing patchpoint at %p - %p\n", addr, addr + size);
// for (int i = 0; i < size; i++) {
// printf("%02x ", *(addr + i));
//}
// printf("\n");
// Check the exact form of the patchpoint call.
// It's important to make sure that the only live registers
// are the ones that are used as arguments; ie it wouldn't
// matter if the call happened on %r10 instead of %r11,
// but it would matter if there wasn't a mov immediately before
// the call, since then %r11 would be live and we couldn't
// use it as a temporary.
// mov $imm, %r11:
ASSERT(start_addr[0] == 0x49, "%x", start_addr[0]);
assert(start_addr[1] == 0xbb);
// 8 bytes of the addr
// callq *%r11:
assert(start_addr[10] == 0x41);
assert(start_addr[11] == 0xff);
assert(start_addr[12] == 0xd3);
int i = INITIAL_CALL_SIZE;
while (*(start_addr + i) == 0x66 || *(start_addr + i) == 0x0f || *(start_addr + i) == 0x2e)
i++;
assert(*(start_addr + i) == 0x90 || *(start_addr + i) == 0x1f);
#endif
void* call_addr = *(void**)&start_addr[2];
Assembler(start_addr, slowpath_start - start_addr).fillWithNops();
std::vector<GenericRegister> regs_to_spill;
for (int dwarf_regnum : live_outs) {
GenericRegister ru = GenericRegister::fromDwarf(dwarf_regnum);
if (ru.type == GenericRegister::GP) {
if (ru.gp == RSP || ru.gp.isCalleeSave())
continue;
}
regs_to_spill.push_back(ru);
}
Assembler assem(slowpath_start, end_addr - slowpath_start);
// if (regs_to_spill.size())
// assem.trap();
assem.emitBatchPush(stack_info, regs_to_spill);
uint8_t* rtn = assem.emitCall(call_addr, R11);
assem.emitBatchPop(stack_info, regs_to_spill);
assem.fillWithNops();
return rtn;
}
} }
} }
...@@ -115,12 +115,13 @@ public: ...@@ -115,12 +115,13 @@ public:
// Macros: // Macros:
uint8_t* emitCall(void* func_addr, Register scratch); uint8_t* emitCall(void* func_addr, Register scratch);
void emitBatchPop(StackInfo stack_info, const std::vector<GenericRegister>& to_push); void emitBatchPop(int scratch_rbp_offset, int scratch_size, const std::vector<GenericRegister>& to_push);
void emitBatchPush(StackInfo stack_info, const std::vector<GenericRegister>& to_push); void emitBatchPush(int scratch_rbp_offset, int scratch_size, const std::vector<GenericRegister>& to_push);
void fillWithNops(); void fillWithNops();
void fillWithNopsExcept(int bytes); void fillWithNopsExcept(int bytes);
void emitAnnotation(int num); void emitAnnotation(int num);
uint8_t* curInstPointer() { return addr; }
bool isExactlyFull() { return addr == end_addr; } bool isExactlyFull() { return addr == end_addr; }
}; };
......
...@@ -122,13 +122,11 @@ int ICSlotRewrite::getFuncStackSize() { ...@@ -122,13 +122,11 @@ int ICSlotRewrite::getFuncStackSize() {
} }
int ICSlotRewrite::getScratchRbpOffset() { int ICSlotRewrite::getScratchRbpOffset() {
assert(ic->stack_info.has_scratch);
assert(ic->stack_info.scratch_bytes); assert(ic->stack_info.scratch_bytes);
return ic->stack_info.scratch_rbp_offset; return ic->stack_info.scratch_rbp_offset;
} }
int ICSlotRewrite::getScratchBytes() { int ICSlotRewrite::getScratchBytes() {
assert(ic->stack_info.has_scratch);
assert(ic->stack_info.scratch_bytes); assert(ic->stack_info.scratch_bytes);
return ic->stack_info.scratch_bytes; return ic->stack_info.scratch_bytes;
} }
...@@ -198,18 +196,18 @@ ICInfo::ICInfo(void* start_addr, void* continue_addr, StackInfo stack_info, int ...@@ -198,18 +196,18 @@ ICInfo::ICInfo(void* start_addr, void* continue_addr, StackInfo stack_info, int
} }
static std::unordered_map<void*, ICInfo*> ics_by_return_addr; static std::unordered_map<void*, ICInfo*> ics_by_return_addr;
void registerCompiledPatchpoint(CompiledFunction* cf, uint8_t* start_addr, const ICSetupInfo* pp, StackInfo stack_info, void registerCompiledPatchpoint(CompiledFunction* cf, uint8_t* start_addr, uint8_t* slowpath_start_addr,
std::unordered_set<int> live_outs) { uint8_t* continue_addr, uint8_t* slowpath_rtn_addr, const ICSetupInfo* ic,
int size = pp->totalSize(); StackInfo stack_info, std::unordered_set<int> live_outs) {
uint8_t* end_addr = start_addr + size; assert(slowpath_start_addr - start_addr >= ic->num_slots * ic->slot_size);
uint8_t* slowpath_addr = end_addr; assert(slowpath_rtn_addr > slowpath_start_addr);
assert(slowpath_rtn_addr <= start_addr + ic->totalSize());
uint8_t* rtn_addr;
assembler::GenericRegister return_register; assembler::GenericRegister return_register;
assert(pp->getCallingConvention() == llvm::CallingConv::C assert(ic->getCallingConvention() == llvm::CallingConv::C
|| pp->getCallingConvention() == llvm::CallingConv::PreserveAll); || ic->getCallingConvention() == llvm::CallingConv::PreserveAll);
if (pp->hasReturnValue()) {
if (ic->hasReturnValue()) {
static const int DWARF_RAX = 0; static const int DWARF_RAX = 0;
// It's possible that the return value doesn't get used, in which case // It's possible that the return value doesn't get used, in which case
// we can avoid copying back into RAX at the end // we can avoid copying back into RAX at the end
...@@ -222,46 +220,29 @@ void registerCompiledPatchpoint(CompiledFunction* cf, uint8_t* start_addr, const ...@@ -222,46 +220,29 @@ void registerCompiledPatchpoint(CompiledFunction* cf, uint8_t* start_addr, const
return_register = assembler::RAX; return_register = assembler::RAX;
} }
if (pp->getCallingConvention() != llvm::CallingConv::C) {
uint8_t* slowpath_start = start_addr + pp->num_slots * pp->slot_size;
rtn_addr = initializePatchpoint2(start_addr, slowpath_start, (uint8_t*)end_addr, stack_info, live_outs);
} else {
// for (int regnum : live_outs) {
//// LLVM has a bug where it incorrectly determines the set of liveouts;
//// so far it only seems to add additional ones to the set, which should
//// hopefully be safe.
//// Otherwise, I'd like to test here that it's only the registers
//// that we'd expect to be saved...
// ASSERT(regnum == 0 || regnum == 3 || regnum == 6 || regnum == 12 || regnum == 13 || regnum == 14 || regnum ==
// 15 || regnum == 7, "%d", regnum);
//}
initializePatchpoint(start_addr, size);
rtn_addr = slowpath_addr;
}
// we can let the user just slide down the nop section, but instead // we can let the user just slide down the nop section, but instead
// emit jumps to the end. // emit jumps to the end.
// Not sure if this is worth it or not? // Not sure if this is worth it or not?
for (int i = 0; i < pp->num_slots; i++) { for (int i = 0; i < ic->num_slots; i++) {
uint8_t* start = start_addr + i * pp->slot_size; uint8_t* start = start_addr + i * ic->slot_size;
// std::unique_ptr<MCWriter> writer(createMCWriter(start, pp->slot_size * (pp->num_slots - i), 0)); // std::unique_ptr<MCWriter> writer(createMCWriter(start, ic->slot_size * (ic->num_slots - i), 0));
// writer->emitNop(); // writer->emitNop();
// writer->emitGuardFalse(); // writer->emitGuardFalse();
std::unique_ptr<Assembler> writer(new Assembler(start, pp->slot_size)); std::unique_ptr<Assembler> writer(new Assembler(start, ic->slot_size));
writer->nop(); writer->nop();
// writer->trap(); // writer->trap();
writer->jmp(JumpDestination::fromStart(pp->slot_size * (pp->num_slots - i))); // writer->jmp(JumpDestination::fromStart(ic->slot_size * (ic->num_slots - i)));
writer->jmp(JumpDestination::fromStart(slowpath_start_addr - start));
} }
ICInfo* ic = new ICInfo(start_addr, slowpath_addr, stack_info, pp->num_slots, pp->slot_size, ICInfo* icinfo = new ICInfo(start_addr, continue_addr, stack_info, ic->num_slots, ic->slot_size,
pp->getCallingConvention(), live_outs, return_register, pp->type_recorder); ic->getCallingConvention(), live_outs, return_register, ic->type_recorder);
ics_by_return_addr[rtn_addr] = ic; ics_by_return_addr[slowpath_rtn_addr] = icinfo;
assert(cf); assert(cf);
cf->ics.push_back(ic); cf->ics.push_back(icinfo);
} }
ICInfo* getICInfo(void* rtn_addr) { ICInfo* getICInfo(void* rtn_addr) {
......
...@@ -129,8 +129,9 @@ public: ...@@ -129,8 +129,9 @@ public:
class ICSetupInfo; class ICSetupInfo;
class CompiledFunction; class CompiledFunction;
void registerCompiledPatchpoint(CompiledFunction* cf, uint8_t* start_addr, const ICSetupInfo*, StackInfo stack_info, void registerCompiledPatchpoint(CompiledFunction* cf, uint8_t* start_addr, uint8_t* slowpath_start_addr,
std::unordered_set<int> live_outs); uint8_t* continue_addr, uint8_t* slowpath_rtn_addr, const ICSetupInfo*,
StackInfo stack_info, std::unordered_set<int> live_outs);
ICInfo* getICInfo(void* rtn_addr); ICInfo* getICInfo(void* rtn_addr);
} }
......
...@@ -791,63 +791,4 @@ public: ...@@ -791,63 +791,4 @@ public:
} }
}; };
#endif #endif
void initializePatchpoint(uint8_t* addr, int size) {
#define CALL_SIZE 13
#ifndef NDEBUG
assert(size >= CALL_SIZE);
// if (VERBOSITY()) printf("initializing patchpoint at %p - %p\n", addr, addr + size);
// for (int i = 0; i < size; i++) {
// printf("%02x ", *(addr + i));
//}
// printf("\n");
// Check the exact form of the patchpoint call.
// It's important to make sure that the only live registers
// are the ones that are used as arguments; ie it wouldn't
// matter if the call happened on %r10 instead of %r11,
// but it would matter if there wasn't a mov immediately before
// the call, since then %r11 would be live and we couldn't
// use it as a temporary.
// mov $imm, %r11:
ASSERT(addr[0] == 0x49, "%x", addr[0]);
assert(addr[1] == 0xbb);
// 8 bytes of the addr
// callq *%r11:
assert(addr[10] == 0x41);
assert(addr[11] == 0xff);
assert(addr[12] == 0xd3);
int i = CALL_SIZE;
while (*(addr + i) == 0x66 || *(addr + i) == 0x0f || *(addr + i) == 0x2e)
i++;
assert(*(addr + i) == 0x90 || *(addr + i) == 0x1f);
#endif
memcpy(addr + size - CALL_SIZE, addr, CALL_SIZE);
memset(addr, 0x90, size - CALL_SIZE);
// addr[0] = 0xcc;
//// Move the call to the end of the region:
// char scratch[CALL_SIZE];
// memcpy(scratch, addr, CALL_SIZE);
// std::memmove(addr, addr + CALL_SIZE, size - CALL_SIZE);
// memcpy(addr + size - CALL_SIZE, scratch, CALL_SIZE);
}
/*
MCWriter* createMCWriter(uint8_t* addr, int size, int num_temp_regs) {
assert(num_temp_regs >= 0);
// The X86MCWriter will automatically use %r10 and %r11, so don't need
// to pass that along. But if the client requested more than two
// temporaries, err out.
assert(num_temp_regs <= 2 && "unsupported");
return new X86MCWriter(addr, size);
}
*/
} }
...@@ -51,9 +51,6 @@ public: ...@@ -51,9 +51,6 @@ public:
virtual void emitCmp(AST_TYPE::AST_TYPE cmp_type, int lhs_argnum, int rhs_argnum, int dest_argnum) = 0; virtual void emitCmp(AST_TYPE::AST_TYPE cmp_type, int lhs_argnum, int rhs_argnum, int dest_argnum) = 0;
virtual void emitToBool(int argnum, int dest_argnum) = 0; virtual void emitToBool(int argnum, int dest_argnum) = 0;
}; };
void initializePatchpoint(uint8_t* addr, int size);
MCWriter* createMCWriter(uint8_t* addr, int size, int num_temp_regs);
} }
#endif #endif
...@@ -30,7 +30,11 @@ static const assembler::Register allocatable_regs[] = { ...@@ -30,7 +30,11 @@ static const assembler::Register allocatable_regs[] = {
assembler::RDI, assembler::RSI, assembler::R8, assembler::R9, assembler::R10, assembler::R11, assembler::RDI, assembler::RSI, assembler::R8, assembler::R9, assembler::R10, assembler::R11,
// For now, cannot allocate callee-save registers since we do not restore them properly // For now, cannot allocate callee-save registers since we do not restore them properly
// at potentially-unwinding callsites. // at potentially-throwing callsites.
// Also, if we wanted to allow spilling of existing values in callee-save registers (which
// adding them to this list would by default enable), we would need to somehow tell our frame
// introspection code where we spilled them to.
//
// TODO fix that behavior, or create an unwinder that knows how to unwind through our // TODO fix that behavior, or create an unwinder that knows how to unwind through our
// inline caches. // inline caches.
/* /*
...@@ -1104,4 +1108,84 @@ RewriterVarUsage RewriterVarUsage::addUse() { ...@@ -1104,4 +1108,84 @@ RewriterVarUsage RewriterVarUsage::addUse() {
#ifndef NDEBUG #ifndef NDEBUG
int RewriterVar::nvars = 0; int RewriterVar::nvars = 0;
#endif #endif
static const int INITIAL_CALL_SIZE = 13;
static const int DWARF_RBP_REGNUM = 6;
void* extractSlowpathFunc(uint8_t* pp_addr) {
#ifndef NDEBUG
// mov $imm, %r11:
ASSERT(pp_addr[0] == 0x49, "%x", pp_addr[0]);
assert(pp_addr[1] == 0xbb);
// 8 bytes of the addr
// callq *%r11:
assert(pp_addr[10] == 0x41);
assert(pp_addr[11] == 0xff);
assert(pp_addr[12] == 0xd3);
int i = INITIAL_CALL_SIZE;
while (*(pp_addr + i) == 0x66 || *(pp_addr + i) == 0x0f || *(pp_addr + i) == 0x2e)
i++;
assert(*(pp_addr + i) == 0x90 || *(pp_addr + i) == 0x1f);
#endif
void* call_addr = *(void**)&pp_addr[2];
return call_addr;
}
std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs) {
assert(start_addr < end_addr);
int est_slowpath_size = INITIAL_CALL_SIZE;
std::vector<assembler::GenericRegister> regs_to_spill;
for (int dwarf_regnum : live_outs) {
assembler::GenericRegister ru = assembler::GenericRegister::fromDwarf(dwarf_regnum);
assert(!(ru.type == assembler::GenericRegister::GP && ru.gp == assembler::R11) && "We assume R11 is free!");
if (ru.type == assembler::GenericRegister::GP) {
if (ru.gp == assembler::RSP || ru.gp.isCalleeSave())
continue;
}
// Location(ru).dump();
regs_to_spill.push_back(ru);
if (ru.type == assembler::GenericRegister::GP)
est_slowpath_size += 14; // 7 bytes for a mov with 4-byte displacement, needed twice
else if (ru.type == assembler::GenericRegister::XMM)
est_slowpath_size += 18; // (up to) 9 bytes for a movsd with 4-byte displacement, needed twice
else
abort();
}
if (VERBOSITY())
printf("Have to spill %ld regs around the slowpath\n", regs_to_spill.size());
// TODO: some of these registers could already have been pushed via the frame saving code
uint8_t* slowpath_start = end_addr - est_slowpath_size;
ASSERT(slowpath_start >= start_addr, "Used more slowpath space than expected; change ICSetupInfo::totalSize()?");
assembler::Assembler _a(start_addr, slowpath_start - start_addr);
//_a.trap();
_a.fillWithNops();
assembler::Assembler assem(slowpath_start, end_addr - slowpath_start);
// if (regs_to_spill.size())
// assem.trap();
assem.emitBatchPush(scratch_offset, scratch_size, regs_to_spill);
uint8_t* rtn = assem.emitCall(slowpath_func, assembler::R11);
assem.emitBatchPop(scratch_offset, scratch_size, regs_to_spill);
assem.fillWithNops();
assert(!assem.hasFailed());
return std::make_pair(slowpath_start, rtn);
}
} }
...@@ -312,6 +312,13 @@ public: ...@@ -312,6 +312,13 @@ public:
friend class RewriterVar; friend class RewriterVar;
friend class RewriterVarUsage; friend class RewriterVarUsage;
}; };
void* extractSlowpathFunc(uint8_t* pp_addr);
// returns (start_of_slowpath, return_addr_of_slowpath_call)
std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs);
} }
#endif #endif
...@@ -20,7 +20,6 @@ namespace pyston { ...@@ -20,7 +20,6 @@ namespace pyston {
struct StackInfo { struct StackInfo {
int stack_size; int stack_size;
bool has_scratch;
int scratch_bytes; int scratch_bytes;
int scratch_rbp_offset; int scratch_rbp_offset;
}; };
...@@ -80,6 +79,8 @@ struct XMMRegister { ...@@ -80,6 +79,8 @@ struct XMMRegister {
bool operator==(const XMMRegister& rhs) const { return regnum == rhs.regnum; } bool operator==(const XMMRegister& rhs) const { return regnum == rhs.regnum; }
bool operator!=(const XMMRegister& rhs) const { return !(*this == rhs); } bool operator!=(const XMMRegister& rhs) const { return !(*this == rhs); }
void dump() const { printf("XMM%d\n", regnum); }
}; };
const XMMRegister XMM0(0); const XMMRegister XMM0(0);
...@@ -129,6 +130,15 @@ struct GenericRegister { ...@@ -129,6 +130,15 @@ struct GenericRegister {
constexpr GenericRegister(const Register r) : gp(r), type(GP) {} constexpr GenericRegister(const Register r) : gp(r), type(GP) {}
constexpr GenericRegister(const XMMRegister r) : xmm(r), type(XMM) {} constexpr GenericRegister(const XMMRegister r) : xmm(r), type(XMM) {}
void dump() const {
if (type == GP)
gp.dump();
else if (type == XMM)
xmm.dump();
else
abort();
}
static GenericRegister fromDwarf(int dwarf_regnum); static GenericRegister fromDwarf(int dwarf_regnum);
}; };
......
...@@ -32,7 +32,7 @@ int ICSetupInfo::totalSize() const { ...@@ -32,7 +32,7 @@ int ICSetupInfo::totalSize() const {
int call_size = CALL_ONLY_SIZE; int call_size = CALL_ONLY_SIZE;
if (getCallingConvention() != llvm::CallingConv::C) { if (getCallingConvention() != llvm::CallingConv::C) {
// 14 bytes per reg that needs to be spilled // 14 bytes per reg that needs to be spilled
call_size += 14 * 4; call_size += 14 * 6;
} }
return num_slots * slot_size + call_size; return num_slots * slot_size + call_size;
} }
...@@ -120,15 +120,49 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) { ...@@ -120,15 +120,49 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
uint8_t* start_addr = (uint8_t*)pp->parentFunction()->code + r->offset; uint8_t* start_addr = (uint8_t*)pp->parentFunction()->code + r->offset;
uint8_t* end_addr = start_addr + pp->patchpointSize(); uint8_t* end_addr = start_addr + pp->patchpointSize();
// TODO shouldn't have to do it this way
void* slowpath_func = extractSlowpathFunc(start_addr);
//*start_addr = 0xcc;
// start_addr++;
const ICSetupInfo* ic = pp->getICInfo(); const ICSetupInfo* ic = pp->getICInfo();
if (ic == NULL) if (ic == NULL) {
// We have to be using the C calling convention here, so we don't need to check the live outs
// or save them across the call.
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size,
std::unordered_set<int>());
continue; continue;
}
std::unordered_set<int> live_outs(extractLiveOuts(r, ic->getCallingConvention())); std::unordered_set<int> live_outs(extractLiveOuts(r, ic->getCallingConvention()));
registerCompiledPatchpoint(cf, start_addr, ic, if (ic->hasReturnValue()) {
StackInfo({ stack_size, true, scratch_size, scratch_rbp_offset }), assert(ic->getCallingConvention() == llvm::CallingConv::C
std::move(live_outs)); || ic->getCallingConvention() == llvm::CallingConv::PreserveAll);
static const int DWARF_RAX = 0;
// It's possible that the return value doesn't get used, in which case
// we can avoid copying back into RAX at the end
if (live_outs.count(DWARF_RAX)) {
live_outs.erase(DWARF_RAX);
}
}
auto _p
= initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size, live_outs);
uint8_t* slowpath_start = _p.first;
uint8_t* slowpath_rtn_addr = _p.second;
ASSERT(slowpath_start - start_addr >= ic->num_slots * ic->slot_size,
"Used more slowpath space than expected; change ICSetupInfo::totalSize()?");
assert(pp->numICStackmapArgs() == 0); // don't do anything with these for now
registerCompiledPatchpoint(cf, start_addr, slowpath_start, end_addr, slowpath_rtn_addr, ic,
StackInfo({ stack_size, scratch_size, scratch_rbp_offset }), std::move(live_outs));
} }
for (PatchpointInfo* pp : new_patchpoints) { for (PatchpointInfo* pp : new_patchpoints) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment