Commit 3796b9f7 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #829 from kmod/perf4

Save the key hash in dictionaries
parents 1051326f 74a42c5e
......@@ -1837,7 +1837,7 @@ Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclo
assert(clfunc->source->scoping->areGlobalsFromModule());
interpreter.setGlobals(source_info->parent_module);
for (const auto& p : frame_state.locals->d) {
for (const auto& p : *frame_state.locals) {
assert(p.first->cls == str_cls);
auto name = static_cast<BoxedString*>(p.first)->s();
if (name == PASSED_GENERATOR_NAME) {
......
......@@ -1061,7 +1061,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
// TODO Right now d just has all the python variables that are *initialized*
// But we also need to loop through all the uninitialized variables that we have
// access to and delete them from the locals dict
for (const auto& p : d->d) {
for (const auto& p : *d) {
Box* varname = p.first;
Box* value = p.second;
setitem(frame_info->boxedLocals, varname, value);
......
......@@ -561,7 +561,7 @@ void setupSys() {
void setupSysEnd() {
std::vector<Box*, StlCompatAllocator<Box*>> builtin_module_names;
for (auto& p : sys_modules_dict->d) {
for (const auto& p : *sys_modules_dict) {
builtin_module_names.push_back(p.first);
}
......
......@@ -102,7 +102,7 @@ Box* classobjNew(Box* _cls, Box* _name, Box* _bases, Box** _args) {
made->giveAttr("__module__", boxString(getCurrentModule()->name()));
made->giveAttr("__doc__", None);
for (auto& p : dict->d) {
for (const auto& p : *dict) {
RELEASE_ASSERT(p.first->cls == str_cls, "");
BoxedString* s = (BoxedString*)p.first;
internStringMortalInplace(s);
......
......@@ -50,7 +50,7 @@ Box* dictRepr(BoxedDict* self) {
try {
chars.push_back('{');
bool first = true;
for (const auto& p : self->d) {
for (const auto& p : *self) {
if (!first) {
chars.push_back(',');
chars.push_back(' ');
......@@ -93,7 +93,7 @@ Box* dictItems(BoxedDict* self) {
BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size());
for (const auto& p : self->d) {
for (const auto& p : *self) {
BoxedTuple* t = BoxedTuple::create({ p.first, p.second });
listAppendInternal(rtn, t);
}
......@@ -104,7 +104,7 @@ Box* dictItems(BoxedDict* self) {
Box* dictValues(BoxedDict* self) {
BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size());
for (const auto& p : self->d) {
for (const auto& p : *self) {
listAppendInternal(rtn, p.second);
}
return rtn;
......@@ -115,7 +115,7 @@ Box* dictKeys(BoxedDict* self) {
BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size());
for (const auto& p : self->d) {
for (const auto& p : *self) {
listAppendInternal(rtn, p.first);
}
return rtn;
......@@ -357,7 +357,7 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb
return 0;
}
*pkey = (*it)->first;
*pkey = (*it)->first.value;
*pvalue = (*it)->second;
++(*it);
......@@ -470,7 +470,7 @@ Box* dictPopitem(BoxedDict* self) {
raiseExcHelper(KeyError, "popitem(): dictionary is empty");
}
Box* key = it->first;
Box* key = it->first.value;
Box* value = it->second;
self->d.erase(it);
......@@ -563,7 +563,7 @@ Box* dictEq(BoxedDict* self, Box* _rhs) {
if (self->d.size() != rhs->d.size())
return False;
for (const auto& p : self->d) {
for (const auto& p : *self) {
auto it = rhs->d.find(p.first);
if (it == rhs->d.end())
return False;
......@@ -708,7 +708,7 @@ void BoxedDict::gcHandler(GCVisitor* v, Box* b) {
BoxedDict* d = (BoxedDict*)b;
for (auto p : d->d) {
for (auto p : *d) {
v->visit(p.first);
v->visit(p.second);
}
......
......@@ -65,11 +65,11 @@ Box* dictIterNext(Box* s) {
Box* rtn = nullptr;
if (self->type == BoxedDictIterator::KeyIterator) {
rtn = self->it->first;
rtn = self->it->first.value;
} else if (self->type == BoxedDictIterator::ValueIterator) {
rtn = self->it->second;
} else if (self->type == BoxedDictIterator::ItemIterator) {
rtn = BoxedTuple::create({ self->it->first, self->it->second });
rtn = BoxedTuple::create({ self->it->first.value, self->it->second });
}
++self->it;
return rtn;
......
......@@ -3335,7 +3335,7 @@ void rearrangeArguments(ParamReceiveSpec paramspec, const ParamNames* param_name
assert(PyDict_Check(kwargs));
BoxedDict* d_kwargs = static_cast<BoxedDict*>(kwargs);
for (auto& p : d_kwargs->d) {
for (const auto& p : *d_kwargs) {
auto k = coerceUnicodeToStr(p.first);
if (k->cls != str_cls)
......@@ -5303,7 +5303,7 @@ Box* _typeNew(BoxedClass* metatype, BoxedString* name, BoxedTuple* bases, BoxedD
made->setattr(dict_str, dict_descr, NULL);
}
for (const auto& p : attr_dict->d) {
for (const auto& p : *attr_dict) {
auto k = coerceUnicodeToStr(p.first);
RELEASE_ASSERT(k->cls == str_cls, "");
......
......@@ -350,7 +350,7 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
} else {
assert(kwargs_sz);
auto const seq = *(kwargs->d.begin());
auto const kw = static_cast<BoxedString*>(seq.first);
auto const kw = static_cast<BoxedString*>(seq.first.value);
if (kw->s() == "sequence")
elements = seq.second;
......
......@@ -2380,7 +2380,7 @@ public:
assert(PyDict_Check(_container));
BoxedDict* container = static_cast<BoxedDict*>(_container);
for (const auto& p : container->d) {
for (const auto& p : *container) {
AttrWrapper::setitem(self, p.first, p.second);
}
}
......
......@@ -659,22 +659,36 @@ struct PyLt {
bool operator()(Box*, Box*) const;
};
struct PythonLevelEq {
static bool isEqual(Box* lhs, Box* rhs) {
if (lhs == rhs)
return true;
if (rhs == getEmptyKey() || rhs == getTombstoneKey())
return false;
return PyEq()(lhs, rhs);
}
static Box* getEmptyKey() { return (Box*)-1; }
static Box* getTombstoneKey() { return (Box*)-2; }
static unsigned getHashValue(Box* val) { return PyHasher()(val); }
};
class BoxedDict : public Box {
public:
typedef llvm::DenseMap<Box*, Box*, PythonLevelEq> DictMap;
// llvm::DenseMap doesn't store the original hash values, choosing to instead
// check for equality more often. This is probably a good tradeoff when the keys
// are pointers and comparison is cheap, but we want to make sure that keys with
// different hash values don't get compared.
struct BoxAndHash {
Box* value;
size_t hash;
BoxAndHash(Box* value) : value(value), hash(PyHasher()(value)) {}
BoxAndHash(Box* value, size_t hash) : value(value), hash(hash) {}
};
struct Comparisons {
static bool isEqual(BoxAndHash lhs, BoxAndHash rhs) {
if (lhs.value == rhs.value)
return true;
if (rhs.value == (Box*)-1 || rhs.value == (Box*)-2)
return false;
if (lhs.hash != rhs.hash)
return false;
return PyEq()(lhs.value, rhs.value);
}
static BoxAndHash getEmptyKey() { return BoxAndHash((Box*)-1, 0); }
static BoxAndHash getTombstoneKey() { return BoxAndHash((Box*)-2, 0); }
static unsigned getHashValue(BoxAndHash val) { return val.hash; }
};
typedef llvm::DenseMap<BoxAndHash, Box*, Comparisons> DictMap;
DictMap d;
......@@ -683,12 +697,32 @@ public:
DEFAULT_CLASS_SIMPLE(dict_cls);
Box* getOrNull(Box* k) {
const auto& p = d.find(k);
const auto& p = d.find(BoxAndHash(k));
if (p != d.end())
return p->second;
return NULL;
}
class iterator {
private:
DictMap::iterator it;
public:
iterator(DictMap::iterator it) : it(std::move(it)) {}
bool operator!=(const iterator& rhs) const { return it != rhs.it; }
bool operator==(const iterator& rhs) const { return it == rhs.it; }
iterator& operator++() {
++it;
return *this;
}
std::pair<Box*, Box*> operator*() const { return std::make_pair(it->first.value, it->second); }
Box* first() const { return it->first.value; }
};
iterator begin() { return iterator(d.begin()); }
iterator end() { return iterator(d.end()); }
static void gcHandler(GCVisitor* v, Box* b);
};
static_assert(sizeof(BoxedDict) == sizeof(PyDictObject), "");
......
......@@ -225,7 +225,7 @@ extern "C" void dumpEx(void* p, int levels) {
if (levels > 0) {
int i = 0;
for (auto t : d->d) {
for (auto t : *d) {
printf("\nKey:");
dumpEx(t.first, levels - 1);
printf("Value:");
......
......@@ -38,3 +38,20 @@ nan = float('nan')
d[nan] = "hello world"
print d[nan]
# Dicts should not check __eq__ for values that have different hash values,
# even if they internally cause a hash collision.
class C(int):
def __eq__(self, rhs):
print "eq", self, rhs
raise Exception("Error, should not call __eq__!")
def __hash__(self):
print "hash", self
return self
d = {}
for i in xrange(1000):
d[C(i)] = i
print len(d)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment