Commit 0fc53b70 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #857 from kmod/perf5

Reduce some codegen allocations
parents d3fb236b ceed3536
......@@ -183,14 +183,14 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
}
ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int num_slots,
int slot_size, llvm::CallingConv::ID calling_conv, const std::unordered_set<int>& live_outs,
int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs,
assembler::GenericRegister return_register, TypeRecorder* type_recorder)
: next_slot_to_try(0),
stack_info(stack_info),
num_slots(num_slots),
slot_size(slot_size),
calling_conv(calling_conv),
live_outs(live_outs.begin(), live_outs.end()),
live_outs(std::move(_live_outs)),
return_register(return_register),
type_recorder(type_recorder),
retry_in(0),
......@@ -200,15 +200,14 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S
slowpath_rtn_addr(slowpath_rtn_addr),
continue_addr(continue_addr) {
for (int i = 0; i < num_slots; i++) {
slots.push_back(ICSlotInfo(this, i));
slots.emplace_back(this, i);
}
}
static llvm::DenseMap<void*, ICInfo*> ics_by_return_addr;
std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* slowpath_start_addr,
uint8_t* continue_addr, uint8_t* slowpath_rtn_addr,
const ICSetupInfo* ic, StackInfo stack_info,
std::unordered_set<int> live_outs) {
const ICSetupInfo* ic, StackInfo stack_info, LiveOutSet live_outs) {
assert(slowpath_start_addr - start_addr >= ic->num_slots * ic->slot_size);
assert(slowpath_rtn_addr > slowpath_start_addr);
assert(slowpath_rtn_addr <= start_addr + ic->totalSize());
......@@ -221,9 +220,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
static const int DWARF_RAX = 0;
// It's possible that the return value doesn't get used, in which case
// we can avoid copying back into RAX at the end
if (live_outs.count(DWARF_RAX)) {
live_outs.erase(DWARF_RAX);
}
live_outs.clear(DWARF_RAX);
// TODO we only need to do this if 0 was in live_outs, since if it wasn't, that indicates
// the return value won't be used and we can optimize based on that.
......@@ -247,7 +244,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
}
ICInfo* icinfo = new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->num_slots, ic->slot_size,
ic->getCallingConvention(), live_outs, return_register, ic->type_recorder);
ic->getCallingConvention(), std::move(live_outs), return_register, ic->type_recorder);
ics_by_return_addr[slowpath_rtn_addr] = icinfo;
......
......@@ -23,6 +23,7 @@
#include "asm_writing/assembler.h"
#include "asm_writing/types.h"
#include "core/util.h"
namespace pyston {
......@@ -91,6 +92,8 @@ public:
friend class ICInfo;
};
typedef BitSet<16> LiveOutSet;
class ICInfo {
private:
std::vector<ICSlotInfo> slots;
......@@ -105,7 +108,7 @@ private:
const int num_slots;
const int slot_size;
const llvm::CallingConv::ID calling_conv;
const std::vector<int> live_outs;
LiveOutSet live_outs;
const assembler::GenericRegister return_register;
TypeRecorder* const type_recorder;
int retry_in, retry_backoff;
......@@ -116,14 +119,14 @@ private:
public:
ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int num_slots,
int slot_size, llvm::CallingConv::ID calling_conv, const std::unordered_set<int>& live_outs,
int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet live_outs,
assembler::GenericRegister return_register, TypeRecorder* type_recorder);
void* const start_addr, *const slowpath_rtn_addr, *const continue_addr;
int getSlotSize() { return slot_size; }
int getNumSlots() { return num_slots; }
llvm::CallingConv::ID getCallingConvention() { return calling_conv; }
const std::vector<int>& getLiveOuts() { return live_outs; }
const LiveOutSet& getLiveOuts() { return live_outs; }
std::unique_ptr<ICSlotRewrite> startRewrite(const char* debug_name);
void clear(ICSlotInfo* entry);
......@@ -138,8 +141,7 @@ class ICSetupInfo;
struct CompiledFunction;
std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* slowpath_start_addr,
uint8_t* continue_addr, uint8_t* slowpath_rtn_addr,
const ICSetupInfo*, StackInfo stack_info,
std::unordered_set<int> live_outs);
const ICSetupInfo*, StackInfo stack_info, LiveOutSet live_outs);
void deregisterCompiledPatchpoint(ICInfo* ic);
ICInfo* getICInfo(void* rtn_addr);
......
......@@ -1804,7 +1804,7 @@ TypeRecorder* Rewriter::getTypeRecorder() {
return rewrite->getTypeRecorder();
}
Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs)
Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const LiveOutSet& live_outs)
: rewrite(std::move(rewrite)),
assembler(this->rewrite->getAssembler()),
picked_slot(NULL),
......@@ -1855,6 +1855,11 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const s
var->locations.push_back(l);
}
// Make sure there weren't duplicates in the live_outs list.
// Probably not a big deal if there were, but we shouldn't be generating those.
assert(std::find(this->live_out_regs.begin(), this->live_out_regs.end(), dwarf_regnum)
== this->live_out_regs.end());
this->live_outs.push_back(var);
this->live_out_regs.push_back(dwarf_regnum);
}
......@@ -2052,8 +2057,8 @@ void setSlowpathFunc(uint8_t* pp_addr, void* func) {
}
PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs, SpillMap& remapped) {
int scratch_offset, int scratch_size, LiveOutSet live_outs,
SpillMap& remapped) {
assert(start_addr < end_addr);
int est_slowpath_size = INITIAL_CALL_SIZE;
......@@ -2061,8 +2066,6 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
std::vector<assembler::GenericRegister> regs_to_spill;
std::vector<assembler::Register> regs_to_reload;
std::unordered_set<int> live_outs_for_slot;
for (int dwarf_regnum : live_outs) {
assembler::GenericRegister ru = assembler::GenericRegister::fromDwarf(dwarf_regnum);
......@@ -2070,7 +2073,7 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
if (ru.type == assembler::GenericRegister::GP) {
if (ru.gp == assembler::RSP || ru.gp.isCalleeSave()) {
live_outs_for_slot.insert(dwarf_regnum);
live_outs.set(dwarf_regnum);
continue;
}
}
......@@ -2086,7 +2089,7 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
continue;
}
live_outs_for_slot.insert(dwarf_regnum);
live_outs.set(dwarf_regnum);
regs_to_spill.push_back(ru);
......@@ -2143,8 +2146,7 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
assem.fillWithNops();
assert(!assem.hasFailed());
return PatchpointInitializationInfo(slowpath_start, slowpath_rtn_addr, continue_addr,
std::move(live_outs_for_slot));
return PatchpointInitializationInfo(slowpath_start, slowpath_rtn_addr, continue_addr, std::move(live_outs));
}
void* Rewriter::RegionAllocator::alloc(size_t bytes) {
......
......@@ -417,7 +417,7 @@ protected:
llvm::SmallVector<RewriterVar*, 8> args;
llvm::SmallVector<RewriterVar*, 8> live_outs;
Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs);
Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const LiveOutSet& live_outs);
std::deque<RewriterAction, RegionAllocatorAdaptor<RewriterAction>> actions;
template <typename F> void addAction(F&& action, llvm::ArrayRef<RewriterVar*> vars, ActionType type) {
......@@ -616,10 +616,10 @@ struct PatchpointInitializationInfo {
uint8_t* slowpath_start;
uint8_t* slowpath_rtn_addr;
uint8_t* continue_addr;
std::unordered_set<int> live_outs;
LiveOutSet live_outs;
PatchpointInitializationInfo(uint8_t* slowpath_start, uint8_t* slowpath_rtn_addr, uint8_t* continue_addr,
std::unordered_set<int>&& live_outs)
LiveOutSet live_outs)
: slowpath_start(slowpath_start),
slowpath_rtn_addr(slowpath_rtn_addr),
continue_addr(continue_addr),
......@@ -627,8 +627,8 @@ struct PatchpointInitializationInfo {
};
PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs, SpillMap& remapped);
int scratch_offset, int scratch_size, LiveOutSet live_outs,
SpillMap& remapped);
template <> inline RewriterVar* RewriterVar::getAttrCast<bool, bool>(int offset, Location loc) {
return getAttr(offset, loc, assembler::MovType::ZBL);
......
......@@ -102,7 +102,7 @@ std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, in
int scratch_offset = num_stack_args * 8;
StackInfo stack_info(scratch_size, scratch_offset);
std::unordered_set<int> live_outs;
LiveOutSet live_outs;
void* fragment_start = a.curInstPointer() - patch_jump_offset;
long fragment_offset = a.bytesWritten() - patch_jump_offset;
......@@ -612,13 +612,13 @@ int JitFragmentWriter::finishCompilation() {
uint8_t* end_addr = pp_info.end_addr;
PatchpointInitializationInfo initialization_info
= initializePatchpoint3(pp_info.func_addr, start_addr, end_addr, 0 /* scratch_offset */,
0 /* scratch_size */, std::unordered_set<int>(), _spill_map);
0 /* scratch_size */, LiveOutSet(), _spill_map);
uint8_t* slowpath_start = initialization_info.slowpath_start;
uint8_t* slowpath_rtn_addr = initialization_info.slowpath_rtn_addr;
std::unique_ptr<ICInfo> pp = registerCompiledPatchpoint(
start_addr, slowpath_start, initialization_info.continue_addr, slowpath_rtn_addr, pp_info.ic.get(),
pp_info.stack_info, std::unordered_set<int>());
std::unique_ptr<ICInfo> pp
= registerCompiledPatchpoint(start_addr, slowpath_start, initialization_info.continue_addr,
slowpath_rtn_addr, pp_info.ic.get(), pp_info.stack_info, LiveOutSet());
pp.release();
}
......
......@@ -247,7 +247,7 @@ ScopeInfo* IRGenState::getScopeInfoForNode(AST* node) {
class IREmitterImpl : public IREmitter {
private:
IRGenState* irstate;
IRBuilder* builder;
std::unique_ptr<IRBuilder> builder;
llvm::BasicBlock*& curblock;
IRGenerator* irgenerator;
......@@ -348,6 +348,9 @@ private:
public:
explicit IREmitterImpl(IRGenState* irstate, llvm::BasicBlock*& curblock, IRGenerator* irgenerator)
: irstate(irstate), builder(new IRBuilder(g.context)), curblock(curblock), irgenerator(irgenerator) {
// Perf note: it seems to be more efficient to separately allocate the "builder" member,
// even though we could allocate it in-line; maybe it's infrequently used enough that it's better
// to not have it take up cache space.
RELEASE_ASSERT(irstate->getSourceInfo()->scoping->areGlobalsFromModule(),
"jit doesn't support custom globals yet");
......@@ -356,7 +359,7 @@ public:
builder->SetInsertPoint(curblock);
}
IRBuilder* getBuilder() override { return builder; }
IRBuilder* getBuilder() override { return &*builder; }
GCBuilder* getGC() override { return irstate->getGC(); }
......@@ -557,8 +560,6 @@ public:
types(types),
state(RUNNING) {}
~IRGeneratorImpl() { delete emitter.getBuilder(); }
private:
OpInfo getOpInfoForNode(AST* ast, const UnwindInfo& unw_info) {
assert(ast);
......
......@@ -29,7 +29,7 @@
namespace pyston {
void PatchpointInfo::addFrameVar(const std::string& name, CompilerType* type) {
void PatchpointInfo::addFrameVar(llvm::StringRef name, CompilerType* type) {
frame_vars.push_back(FrameVarInfo({.name = name, .type = type }));
}
......@@ -91,7 +91,7 @@ void PatchpointInfo::parseLocationMap(StackMap::Record* r, LocationMap* map) {
int num_args = frame_var.type->numFrameArgs();
llvm::SmallVector<StackMap::Record::Location, 1> locations;
locations.append(&r->locations[cur_arg], &r->locations[cur_arg + num_args]);
locations.append(r->locations.data() + cur_arg, r->locations.data() + cur_arg + num_args);
// printf("%s %d %d\n", frame_var.name.c_str(), r->locations[cur_arg].type, r->locations[cur_arg].regnum);
......@@ -117,14 +117,14 @@ static int extractScratchOffset(PatchpointInfo* pp, StackMap::Record* r) {
return l.offset;
}
static std::unordered_set<int> extractLiveOuts(StackMap::Record* r, llvm::CallingConv::ID cc) {
std::unordered_set<int> live_outs;
static LiveOutSet extractLiveOuts(StackMap::Record* r, llvm::CallingConv::ID cc) {
LiveOutSet live_outs;
// Using the C calling convention, there shouldn't be any non-callee-save registers in here,
// but LLVM is conservative and will add some. So with the C cc, ignored the specified live outs
if (cc != llvm::CallingConv::C) {
for (const auto& live_out : r->live_outs) {
live_outs.insert(live_out.regnum);
live_outs.set(live_out.regnum);
}
}
......@@ -133,11 +133,11 @@ static std::unordered_set<int> extractLiveOuts(StackMap::Record* r, llvm::Callin
// sense to track them as live_outs.
// Unfortunately this means we need to be conservative about it unless
// we can change llvm's behavior.
live_outs.insert(3);
live_outs.insert(12);
live_outs.insert(13);
live_outs.insert(14);
live_outs.insert(15);
live_outs.set(3);
live_outs.set(12);
live_outs.set(13);
live_outs.set(14);
live_outs.set(15);
return live_outs;
}
......@@ -151,7 +151,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
cf->location_map->constants = stackmap->constants;
for (int i = 0; i < nrecords; i++) {
StackMap::Record* r = stackmap->records[i];
StackMap::Record* r = &stackmap->records[i];
assert(stackmap->stack_size_records.size() == 1);
const StackMap::StackSizeRecord& stack_size_record = stackmap->stack_size_records[0];
......@@ -212,12 +212,12 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
if (ic == NULL) {
// We have to be using the C calling convention here, so we don't need to check the live outs
// or save them across the call.
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size,
std::unordered_set<int>(), frame_remapped);
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size, LiveOutSet(),
frame_remapped);
continue;
}
std::unordered_set<int> live_outs(extractLiveOuts(r, ic->getCallingConvention()));
LiveOutSet live_outs(extractLiveOuts(r, ic->getCallingConvention()));
if (ic->hasReturnValue()) {
assert(ic->getCallingConvention() == llvm::CallingConv::C
......@@ -226,14 +226,12 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
static const int DWARF_RAX = 0;
// It's possible that the return value doesn't get used, in which case
// we can avoid copying back into RAX at the end
if (live_outs.count(DWARF_RAX)) {
live_outs.erase(DWARF_RAX);
}
live_outs.clear(DWARF_RAX);
}
auto initialization_info = initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset,
scratch_size, live_outs, frame_remapped);
scratch_size, std::move(live_outs), frame_remapped);
ASSERT(initialization_info.slowpath_start - start_addr >= ic->num_slots * ic->slot_size,
"Used more slowpath space than expected; change ICSetupInfo::totalSize()?");
......
......@@ -42,7 +42,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap);
struct PatchpointInfo {
public:
struct FrameVarInfo {
std::string name;
llvm::StringRef name;
CompilerType* type;
};
......@@ -74,7 +74,7 @@ public:
int scratchStackmapArg() { return 0; }
int scratchSize() { return 80 + MAX_FRAME_SPILLS * sizeof(void*); }
void addFrameVar(const std::string& name, CompilerType* type);
void addFrameVar(llvm::StringRef name, CompilerType* type);
void setNumFrameArgs(int num_frame_args) {
assert(num_frame_stackmap_args == -1);
num_frame_stackmap_args = num_frame_args;
......
......@@ -70,6 +70,7 @@ StackMap* parseStackMap() {
if (VERBOSITY() >= 3)
printf("%d functions\n", nfunctions);
cur_map->stack_size_records.reserve(nfunctions);
for (int i = 0; i < nfunctions; i++) {
const StackMap::StackSizeRecord& size_record = *ptr.size_record++;
cur_map->stack_size_records.push_back(size_record);
......@@ -79,6 +80,7 @@ StackMap* parseStackMap() {
if (VERBOSITY() >= 3)
printf("%d constants\n", nconstants);
cur_map->constants.reserve(nconstants);
for (int i = 0; i < nconstants; i++) {
uint64_t constant = *ptr.u64++;
......@@ -89,16 +91,18 @@ StackMap* parseStackMap() {
if (VERBOSITY() >= 3)
printf("%d records\n", nrecords);
cur_map->records.reserve(nrecords);
for (int i = 0; i < nrecords; i++) {
StackMap::Record* record = new StackMap::Record();
cur_map->records.push_back(record);
cur_map->records.emplace_back();
StackMap::Record* record = &cur_map->records.back();
record->id = *ptr.u64++;
record->offset = *ptr.u32++;
record->flags = *ptr.u16++; // reserved (record flags)
int numlocations = *ptr.u16++;
record->locations.reserve(numlocations);
if (VERBOSITY() >= 3)
printf("Stackmap record %ld at 0x%x has %d locations:\n", record->id, record->offset, numlocations);
......@@ -125,6 +129,7 @@ StackMap* parseStackMap() {
ptr.u16++; // padding
int num_live_outs = *ptr.u16++;
record->live_outs.reserve(num_live_outs);
for (int i = 0; i < num_live_outs; i++) {
const StackMap::Record::LiveOut& r = *ptr.record_liveout++;
record->live_outs.push_back(r);
......
......@@ -20,6 +20,7 @@
#include <vector>
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
namespace llvm {
class JITEventListener;
......@@ -61,20 +62,20 @@ struct StackMap {
uint64_t id;
uint32_t offset;
uint16_t flags;
std::vector<Location> locations;
std::vector<LiveOut> live_outs;
llvm::SmallVector<Location, 8> locations;
llvm::SmallVector<LiveOut, 8> live_outs;
};
std::vector<StackSizeRecord> stack_size_records;
llvm::SmallVector<StackSizeRecord, 1> stack_size_records;
uint32_t header;
std::vector<uint64_t> constants;
std::vector<Record*> records;
llvm::SmallVector<uint64_t, 8> constants;
std::vector<Record> records;
};
// TODO this belongs somewhere else?
class LocationMap {
public:
std::vector<uint64_t> constants;
llvm::SmallVector<uint64_t, 8> constants;
StackMap::Record::Location frame_info_location;
bool frameInfoFound() { return frame_info_location.type != 0; }
......@@ -88,10 +89,10 @@ public:
CompilerType* type;
llvm::SmallVector<StackMap::Record::Location, 1> locations;
};
std::vector<LocationEntry> locations;
llvm::SmallVector<LocationEntry, 2> locations;
};
std::unordered_map<std::string, LocationTable> names;
llvm::StringMap<LocationTable> names;
};
StackMap* parseStackMap();
......
......@@ -866,7 +866,7 @@ DeoptState getDeoptState() {
std::unordered_set<std::string> is_undefined;
for (const auto& p : cf->location_map->names) {
if (!startswith(p.first, "!is_defined_"))
if (!startswith(p.first(), "!is_defined_"))
continue;
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
......@@ -876,7 +876,7 @@ DeoptState getDeoptState() {
assert(locs.size() == 1);
uint64_t v = frame_iter->readLocation(locs[0]);
if ((v & 1) == 0)
is_undefined.insert(p.first.substr(12));
is_undefined.insert(p.first().substr(12));
break;
}
......@@ -884,10 +884,10 @@ DeoptState getDeoptState() {
}
for (const auto& p : cf->location_map->names) {
if (p.first[0] == '!')
if (p.first()[0] == '!')
continue;
if (is_undefined.count(p.first))
if (is_undefined.count(p.first()))
continue;
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
......@@ -895,16 +895,16 @@ DeoptState getDeoptState() {
const auto& locs = e.locations;
llvm::SmallVector<uint64_t, 1> vals;
// printf("%s: %s\n", p.first.c_str(), e.type->debugName().c_str());
// printf("%s: %s\n", p.first().c_str(), e.type->debugName().c_str());
for (auto& loc : locs) {
vals.push_back(frame_iter->readLocation(loc));
}
Box* v = e.type->deserializeFromFrame(vals);
// printf("%s: (pp id %ld) %p\n", p.first.c_str(), e._debug_pp_id, v);
// printf("%s: (pp id %ld) %p\n", p.first().c_str(), e._debug_pp_id, v);
ASSERT(gc::isValidGCObject(v), "%p", v);
d->d[boxString(p.first)] = v;
d->d[boxString(p.first())] = v;
}
}
}
......@@ -961,7 +961,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
std::unordered_set<std::string> is_undefined;
for (const auto& p : cf->location_map->names) {
if (!startswith(p.first, "!is_defined_"))
if (!startswith(p.first(), "!is_defined_"))
continue;
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
......@@ -971,7 +971,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
assert(locs.size() == 1);
uint64_t v = impl->readLocation(locs[0]);
if ((v & 1) == 0)
is_undefined.insert(p.first.substr(12));
is_undefined.insert(p.first().substr(12));
break;
}
......@@ -979,13 +979,13 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
}
for (const auto& p : cf->location_map->names) {
if (p.first[0] == '!')
if (p.first()[0] == '!')
continue;
if (p.first[0] == '#')
if (p.first()[0] == '#')
continue;
if (is_undefined.count(p.first))
if (is_undefined.count(p.first()))
continue;
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
......@@ -993,7 +993,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
const auto& locs = e.locations;
llvm::SmallVector<uint64_t, 1> vals;
// printf("%s: %s\n", p.first.c_str(), e.type->debugName().c_str());
// printf("%s: %s\n", p.first().c_str(), e.type->debugName().c_str());
// printf("%ld locs\n", locs.size());
for (auto& loc : locs) {
......@@ -1004,9 +1004,9 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
}
Box* v = e.type->deserializeFromFrame(vals);
// printf("%s: (pp id %ld) %p\n", p.first.c_str(), e._debug_pp_id, v);
// printf("%s: (pp id %ld) %p\n", p.first().c_str(), e._debug_pp_id, v);
assert(gc::isValidGCObject(v));
d->d[boxString(p.first)] = v;
d->d[boxString(p.first())] = v;
}
}
}
......
......@@ -192,8 +192,8 @@ private:
LOCK_REGION(&self->lock);
ASSERT(self->map.size() == self->map_elts, "%ld %d", self->map.size(), self->map_elts);
assert(s->my_tid == pthread_self());
ASSERT(self->map.size() == self->map_elts, "%ld %d", self->map.size(), self->map_elts);
assert(self->map.count(pthread_self()));
self->map.erase(pthread_self());
......@@ -202,6 +202,8 @@ private:
#endif
delete s;
ASSERT(self->map.size() == self->map_elts, "%ld %d", self->map.size(), self->map_elts);
}
template <int... S> Storage* make(impl::seq<S...>) {
......@@ -214,6 +216,7 @@ private:
protected:
void onFork() override {
ASSERT(this->map.size() == this->map_elts, "%ld %d", this->map.size(), this->map_elts);
pthread_t surviving_ptid = pthread_self();
for (auto it = this->map.begin(), end = this->map.end(); it != end;) {
if (it->first != surviving_ptid) {
......@@ -241,6 +244,8 @@ public:
for (auto& p : map) {
f(&p.second->val);
}
ASSERT(this->map.size() == this->map_elts, "%ld %d", this->map.size(), this->map_elts);
}
template <typename... Arguments> void forEachValue(std::function<void(T*, Arguments...)> f, Arguments... args) {
......@@ -250,6 +255,8 @@ public:
for (auto& p : map) {
f(&p.second->val, std::forward<Arguments>(args)...);
}
ASSERT(this->map.size() == this->map_elts, "%ld %d", this->map.size(), this->map_elts);
}
T* get() {
......
......@@ -128,6 +128,62 @@ template <class T1, class T2> bool sameKeyset(T1* lhs, T2* rhs) {
}
return good;
}
// A simple constant-width bitset.
template <int N> struct BitSet {
uint16_t bits = 0;
static_assert(N <= 8 * sizeof(bits), "");
struct iterator {
const BitSet& set;
int cur;
iterator(const BitSet& set, int cur) : set(set), cur(cur) {}
int operator*() {
assert(cur >= 0 && cur < N);
return cur;
}
bool operator==(const iterator& rhs) { return cur == rhs.cur; }
bool operator!=(const iterator& rhs) { return !(*this == rhs); }
iterator& operator++() {
// TODO: this function (and begin()) could be optimized using __builtin_ctz
assert(cur >= 0 && cur < N);
uint16_t tmp = set.bits;
tmp >>= cur + 1;
cur++;
while (cur < N) {
if (tmp & 1)
return *this;
cur++;
tmp >>= 1;
}
assert(cur == N);
return *this;
}
};
void set(int idx) {
assert(idx >= 0 && idx < N);
bits |= (1 << idx);
}
void clear(int idx) {
assert(idx >= 0 && idx < N);
bits &= ~(1 << idx);
}
iterator begin() const {
uint16_t tmp = bits;
for (int i = 0; i < N; i++) {
if (tmp & 1)
return iterator(*this, i);
}
return iterator(*this, N);
}
iterator end() const { return iterator(*this, N); }
};
}
#endif
......@@ -273,9 +273,8 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(R
SpillMap _spill_map;
PatchpointInitializationInfo initialization_info
= initializePatchpoint3(func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */,
std::unordered_set<int>(), _spill_map);
PatchpointInitializationInfo initialization_info = initializePatchpoint3(
func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */, LiveOutSet(), _spill_map);
assert(_spill_map.size() == 0);
assert(initialization_info.slowpath_start == pp_start + patchable_size);
assert(initialization_info.slowpath_rtn_addr == pp_end);
......@@ -283,7 +282,7 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(R
StackInfo stack_info(SCRATCH_BYTES, 0);
icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(),
stack_info, std::unordered_set<int>());
stack_info, LiveOutSet());
assembler::Assembler prologue_assem((uint8_t*)addr, PROLOGUE_SIZE);
#if RUNTIMEICS_OMIT_FRAME_PTR
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment