Commit 7b941863 authored by Marius Wachtler's avatar Marius Wachtler

Merge pull request #1122 from undingen/bjit_r12_r13

bjit: microptimization use r13 instead of r12 for the interpreter pointer
parents 45788284 b77872ef
...@@ -39,17 +39,18 @@ static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations; ...@@ -39,17 +39,18 @@ static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations;
// long foo(char* c); // long foo(char* c);
// void bjit() { // void bjit() {
// asm volatile ("" ::: "r14"); // asm volatile ("" ::: "r14");
// asm volatile ("" ::: "r12"); // asm volatile ("" ::: "r13");
// char scratch[256+16]; // char scratch[256+16];
// foo(scratch); // foo(scratch);
// } // }
// //
// It omits the frame pointer but saves R12 and R14 // It omits the frame pointer but saves r13 and r14
// use 'objdump -s -j .eh_frame <obj.file>' to dump it
const unsigned char eh_info[] const unsigned char eh_info[]
= { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10, = { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10,
0x01, 0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x01, 0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42,
0x0e, 0x18, 0x47, 0x0e, 0xb0, 0x02, 0x8c, 0x03, 0x8e, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 }; 0x0e, 0x18, 0x47, 0x0e, 0xb0, 0x02, 0x8d, 0x03, 0x8e, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 };
static_assert(JitCodeBlock::num_stack_args == 2, "have to update EH table!"); static_assert(JitCodeBlock::num_stack_args == 2, "have to update EH table!");
static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!"); static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!");
...@@ -70,10 +71,10 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name) ...@@ -70,10 +71,10 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
// emit prolog // emit prolog
a.push(assembler::R14); a.push(assembler::R14);
a.push(assembler::R12); a.push(assembler::R13);
static_assert(sp_adjustment % 16 == 8, "stack isn't aligned"); static_assert(sp_adjustment % 16 == 8, "stack isn't aligned");
a.sub(assembler::Immediate(sp_adjustment), assembler::RSP); a.sub(assembler::Immediate(sp_adjustment), assembler::RSP);
a.mov(assembler::RDI, assembler::R12); // interpreter pointer a.mov(assembler::RDI, assembler::R13); // interpreter pointer
a.mov(assembler::RDX, assembler::R14); // vreg array a.mov(assembler::RDX, assembler::R14); // vreg array
a.jmp(assembler::Indirect(assembler::RSI, offsetof(CFGBlock, code))); // jump to block a.jmp(assembler::Indirect(assembler::RSI, offsetof(CFGBlock, code))); // jump to block
...@@ -145,7 +146,7 @@ JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic ...@@ -145,7 +146,7 @@ JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic
interp(0), interp(0),
ic_info(std::move(ic_info)) { ic_info(std::move(ic_info)) {
interp = createNewVar(); interp = createNewVar();
addLocationToVar(interp, assembler::R12); addLocationToVar(interp, assembler::R13);
interp->setAttr(ASTInterpreterJitInterface::getCurrentBlockOffset(), imm(block)); interp->setAttr(ASTInterpreterJitInterface::getCurrentBlockOffset(), imm(block));
vregs_array = createNewVar(); vregs_array = createNewVar();
...@@ -806,7 +807,7 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& siz ...@@ -806,7 +807,7 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& siz
int num_bytes = assembler->bytesWritten(); int num_bytes = assembler->bytesWritten();
block_next->getInReg(assembler::RAX, true); block_next->getInReg(assembler::RAX, true);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP); assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12); assembler->pop(assembler::R13);
assembler->pop(assembler::R14); assembler->pop(assembler::R14);
assembler->retq(); assembler->retq();
...@@ -830,7 +831,7 @@ void JitFragmentWriter::_emitOSRPoint() { ...@@ -830,7 +831,7 @@ void JitFragmentWriter::_emitOSRPoint() {
// this generates code for: // this generates code for:
// if (++interpreter.edgecount < OSR_THRESHOLD_BASELINE) // if (++interpreter.edgecount < OSR_THRESHOLD_BASELINE)
// return std::make_pair((CFGBlock*)0, ASTInterpreterJitInterface::osr_dummy_value); // return std::make_pair((CFGBlock*)0, ASTInterpreterJitInterface::osr_dummy_value);
assembler::Register interp_reg = getInterp()->getInReg(); // will always be R12 assembler::Register interp_reg = getInterp()->getInReg(); // will always be r13
assembler::Indirect edgecount = assembler::Indirect(interp_reg, ASTInterpreterJitInterface::getEdgeCountOffset()); assembler::Indirect edgecount = assembler::Indirect(interp_reg, ASTInterpreterJitInterface::getEdgeCountOffset());
assembler->incl(edgecount); // 32bit inc assembler->incl(edgecount); // 32bit inc
assembler->cmpl(edgecount, assembler::Immediate(OSR_THRESHOLD_BASELINE)); // 32bit cmp assembler->cmpl(edgecount, assembler::Immediate(OSR_THRESHOLD_BASELINE)); // 32bit cmp
...@@ -839,7 +840,7 @@ void JitFragmentWriter::_emitOSRPoint() { ...@@ -839,7 +840,7 @@ void JitFragmentWriter::_emitOSRPoint() {
assembler->clear_reg(assembler::RAX); // = next block to execute assembler->clear_reg(assembler::RAX); // = next block to execute
assembler->mov(assembler::Immediate(ASTInterpreterJitInterface::osr_dummy_value), assembler::RDX); assembler->mov(assembler::Immediate(ASTInterpreterJitInterface::osr_dummy_value), assembler::RDX);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP); assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12); assembler->pop(assembler::R13);
assembler->pop(assembler::R14); assembler->pop(assembler::R14);
assembler->retq(); assembler->retq();
} }
...@@ -933,7 +934,7 @@ void JitFragmentWriter::_emitReturn(RewriterVar* return_val) { ...@@ -933,7 +934,7 @@ void JitFragmentWriter::_emitReturn(RewriterVar* return_val) {
return_val->getInReg(assembler::RDX, true); return_val->getInReg(assembler::RDX, true);
assembler->clear_reg(assembler::RAX); assembler->clear_reg(assembler::RAX);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP); assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12); assembler->pop(assembler::R13);
assembler->pop(assembler::R14); assembler->pop(assembler::R14);
assembler->retq(); assembler->retq();
return_val->bumpUse(); return_val->bumpUse();
......
...@@ -70,7 +70,7 @@ class JitFragmentWriter; ...@@ -70,7 +70,7 @@ class JitFragmentWriter;
// register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance. // register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance.
// //
// We use the following callee-save regs to speed up the generated code: // We use the following callee-save regs to speed up the generated code:
// r12: pointer to ASTInterpreter instance // r13: pointer to ASTInterpreter instance
// r14: pointer to the vregs array // r14: pointer to the vregs array
// //
// To execute a specific CFGBlock one has to call: // To execute a specific CFGBlock one has to call:
...@@ -91,10 +91,10 @@ class JitFragmentWriter; ...@@ -91,10 +91,10 @@ class JitFragmentWriter;
// Basic layout of generated code block is: // Basic layout of generated code block is:
// entry_code: // entry_code:
// push %r14 ; save r14 // push %r14 ; save r14
// push %r12 ; save r12 // push %r13 ; save r13
// sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the // sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the
// stack + 8 byte for stack alignment // stack + 8 byte for stack alignment
// mov %rdi,%r12 ; copy the pointer to ASTInterpreter instance into r12 // mov %rdi,%r13 ; copy the pointer to ASTInterpreter instance into r13
// mov %rdx,%r14 ; copy the pointer to the vregs array into r14 // mov %rdx,%r14 ; copy the pointer to the vregs array into r14
// jmpq *0x8(%rsi) ; jump to block->code // jmpq *0x8(%rsi) ; jump to block->code
// possible values: first_JitFragment, second_JitFragment,... // possible values: first_JitFragment, second_JitFragment,...
...@@ -107,7 +107,7 @@ class JitFragmentWriter; ...@@ -107,7 +107,7 @@ class JitFragmentWriter;
// jne end_side_exit // jne end_side_exit
// movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg) // movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg)
// add $0x118,%rsp ; restore stack pointer // add $0x118,%rsp ; restore stack pointer
// pop %r12 ; restore r12 // pop %r13 ; restore r13
// pop %r14 ; restore r14 // pop %r14 ; restore r14
// ret ; exit to the interpreter which will interpret the specified CFGBLock* // ret ; exit to the interpreter which will interpret the specified CFGBLock*
// end_side_exit: // end_side_exit:
...@@ -120,7 +120,7 @@ class JitFragmentWriter; ...@@ -120,7 +120,7 @@ class JitFragmentWriter;
// in this case 0 which means we are finished // in this case 0 which means we are finished
// movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return // movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return
// add $0x118,%rsp ; restore stack pointer // add $0x118,%rsp ; restore stack pointer
// pop %r12 ; restore r12 // pop %r13 ; restore r13
// pop %r14 ; restore r14 // pop %r14 ; restore r14
// ret // ret
// //
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment