Commit 74a42c5e authored by Kevin Modzelewski's avatar Kevin Modzelewski

Save the key hash in dictionaries

This lets us avoid checking equality if the hash values didn't match.
Checking equality can involve calling into user code, so it can be both
a perf drain and a noticeable behavioral difference.
parent b6e726c0
...@@ -1825,7 +1825,7 @@ Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclo ...@@ -1825,7 +1825,7 @@ Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclo
assert(clfunc->source->scoping->areGlobalsFromModule()); assert(clfunc->source->scoping->areGlobalsFromModule());
interpreter.setGlobals(source_info->parent_module); interpreter.setGlobals(source_info->parent_module);
for (const auto& p : frame_state.locals->d) { for (const auto& p : *frame_state.locals) {
assert(p.first->cls == str_cls); assert(p.first->cls == str_cls);
auto name = static_cast<BoxedString*>(p.first)->s(); auto name = static_cast<BoxedString*>(p.first)->s();
if (name == PASSED_GENERATOR_NAME) { if (name == PASSED_GENERATOR_NAME) {
......
...@@ -1061,7 +1061,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() { ...@@ -1061,7 +1061,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
// TODO Right now d just has all the python variables that are *initialized* // TODO Right now d just has all the python variables that are *initialized*
// But we also need to loop through all the uninitialized variables that we have // But we also need to loop through all the uninitialized variables that we have
// access to and delete them from the locals dict // access to and delete them from the locals dict
for (const auto& p : d->d) { for (const auto& p : *d) {
Box* varname = p.first; Box* varname = p.first;
Box* value = p.second; Box* value = p.second;
setitem(frame_info->boxedLocals, varname, value); setitem(frame_info->boxedLocals, varname, value);
......
...@@ -561,7 +561,7 @@ void setupSys() { ...@@ -561,7 +561,7 @@ void setupSys() {
void setupSysEnd() { void setupSysEnd() {
std::vector<Box*, StlCompatAllocator<Box*>> builtin_module_names; std::vector<Box*, StlCompatAllocator<Box*>> builtin_module_names;
for (auto& p : sys_modules_dict->d) { for (const auto& p : *sys_modules_dict) {
builtin_module_names.push_back(p.first); builtin_module_names.push_back(p.first);
} }
......
...@@ -102,7 +102,7 @@ Box* classobjNew(Box* _cls, Box* _name, Box* _bases, Box** _args) { ...@@ -102,7 +102,7 @@ Box* classobjNew(Box* _cls, Box* _name, Box* _bases, Box** _args) {
made->giveAttr("__module__", boxString(getCurrentModule()->name())); made->giveAttr("__module__", boxString(getCurrentModule()->name()));
made->giveAttr("__doc__", None); made->giveAttr("__doc__", None);
for (auto& p : dict->d) { for (const auto& p : *dict) {
RELEASE_ASSERT(p.first->cls == str_cls, ""); RELEASE_ASSERT(p.first->cls == str_cls, "");
BoxedString* s = (BoxedString*)p.first; BoxedString* s = (BoxedString*)p.first;
internStringMortalInplace(s); internStringMortalInplace(s);
......
...@@ -50,7 +50,7 @@ Box* dictRepr(BoxedDict* self) { ...@@ -50,7 +50,7 @@ Box* dictRepr(BoxedDict* self) {
try { try {
chars.push_back('{'); chars.push_back('{');
bool first = true; bool first = true;
for (const auto& p : self->d) { for (const auto& p : *self) {
if (!first) { if (!first) {
chars.push_back(','); chars.push_back(',');
chars.push_back(' '); chars.push_back(' ');
...@@ -93,7 +93,7 @@ Box* dictItems(BoxedDict* self) { ...@@ -93,7 +93,7 @@ Box* dictItems(BoxedDict* self) {
BoxedList* rtn = new BoxedList(); BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size()); rtn->ensure(self->d.size());
for (const auto& p : self->d) { for (const auto& p : *self) {
BoxedTuple* t = BoxedTuple::create({ p.first, p.second }); BoxedTuple* t = BoxedTuple::create({ p.first, p.second });
listAppendInternal(rtn, t); listAppendInternal(rtn, t);
} }
...@@ -104,7 +104,7 @@ Box* dictItems(BoxedDict* self) { ...@@ -104,7 +104,7 @@ Box* dictItems(BoxedDict* self) {
Box* dictValues(BoxedDict* self) { Box* dictValues(BoxedDict* self) {
BoxedList* rtn = new BoxedList(); BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size()); rtn->ensure(self->d.size());
for (const auto& p : self->d) { for (const auto& p : *self) {
listAppendInternal(rtn, p.second); listAppendInternal(rtn, p.second);
} }
return rtn; return rtn;
...@@ -115,7 +115,7 @@ Box* dictKeys(BoxedDict* self) { ...@@ -115,7 +115,7 @@ Box* dictKeys(BoxedDict* self) {
BoxedList* rtn = new BoxedList(); BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size()); rtn->ensure(self->d.size());
for (const auto& p : self->d) { for (const auto& p : *self) {
listAppendInternal(rtn, p.first); listAppendInternal(rtn, p.first);
} }
return rtn; return rtn;
...@@ -357,7 +357,7 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb ...@@ -357,7 +357,7 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb
return 0; return 0;
} }
*pkey = (*it)->first; *pkey = (*it)->first.value;
*pvalue = (*it)->second; *pvalue = (*it)->second;
++(*it); ++(*it);
...@@ -470,7 +470,7 @@ Box* dictPopitem(BoxedDict* self) { ...@@ -470,7 +470,7 @@ Box* dictPopitem(BoxedDict* self) {
raiseExcHelper(KeyError, "popitem(): dictionary is empty"); raiseExcHelper(KeyError, "popitem(): dictionary is empty");
} }
Box* key = it->first; Box* key = it->first.value;
Box* value = it->second; Box* value = it->second;
self->d.erase(it); self->d.erase(it);
...@@ -563,7 +563,7 @@ Box* dictEq(BoxedDict* self, Box* _rhs) { ...@@ -563,7 +563,7 @@ Box* dictEq(BoxedDict* self, Box* _rhs) {
if (self->d.size() != rhs->d.size()) if (self->d.size() != rhs->d.size())
return False; return False;
for (const auto& p : self->d) { for (const auto& p : *self) {
auto it = rhs->d.find(p.first); auto it = rhs->d.find(p.first);
if (it == rhs->d.end()) if (it == rhs->d.end())
return False; return False;
...@@ -708,7 +708,7 @@ void BoxedDict::gcHandler(GCVisitor* v, Box* b) { ...@@ -708,7 +708,7 @@ void BoxedDict::gcHandler(GCVisitor* v, Box* b) {
BoxedDict* d = (BoxedDict*)b; BoxedDict* d = (BoxedDict*)b;
for (auto p : d->d) { for (auto p : *d) {
v->visit(p.first); v->visit(p.first);
v->visit(p.second); v->visit(p.second);
} }
......
...@@ -65,11 +65,11 @@ Box* dictIterNext(Box* s) { ...@@ -65,11 +65,11 @@ Box* dictIterNext(Box* s) {
Box* rtn = nullptr; Box* rtn = nullptr;
if (self->type == BoxedDictIterator::KeyIterator) { if (self->type == BoxedDictIterator::KeyIterator) {
rtn = self->it->first; rtn = self->it->first.value;
} else if (self->type == BoxedDictIterator::ValueIterator) { } else if (self->type == BoxedDictIterator::ValueIterator) {
rtn = self->it->second; rtn = self->it->second;
} else if (self->type == BoxedDictIterator::ItemIterator) { } else if (self->type == BoxedDictIterator::ItemIterator) {
rtn = BoxedTuple::create({ self->it->first, self->it->second }); rtn = BoxedTuple::create({ self->it->first.value, self->it->second });
} }
++self->it; ++self->it;
return rtn; return rtn;
......
...@@ -3335,7 +3335,7 @@ void rearrangeArguments(ParamReceiveSpec paramspec, const ParamNames* param_name ...@@ -3335,7 +3335,7 @@ void rearrangeArguments(ParamReceiveSpec paramspec, const ParamNames* param_name
assert(PyDict_Check(kwargs)); assert(PyDict_Check(kwargs));
BoxedDict* d_kwargs = static_cast<BoxedDict*>(kwargs); BoxedDict* d_kwargs = static_cast<BoxedDict*>(kwargs);
for (auto& p : d_kwargs->d) { for (const auto& p : *d_kwargs) {
auto k = coerceUnicodeToStr(p.first); auto k = coerceUnicodeToStr(p.first);
if (k->cls != str_cls) if (k->cls != str_cls)
...@@ -5303,7 +5303,7 @@ Box* _typeNew(BoxedClass* metatype, BoxedString* name, BoxedTuple* bases, BoxedD ...@@ -5303,7 +5303,7 @@ Box* _typeNew(BoxedClass* metatype, BoxedString* name, BoxedTuple* bases, BoxedD
made->setattr(dict_str, dict_descr, NULL); made->setattr(dict_str, dict_descr, NULL);
} }
for (const auto& p : attr_dict->d) { for (const auto& p : *attr_dict) {
auto k = coerceUnicodeToStr(p.first); auto k = coerceUnicodeToStr(p.first);
RELEASE_ASSERT(k->cls == str_cls, ""); RELEASE_ASSERT(k->cls == str_cls, "");
......
...@@ -350,7 +350,7 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) { ...@@ -350,7 +350,7 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
} else { } else {
assert(kwargs_sz); assert(kwargs_sz);
auto const seq = *(kwargs->d.begin()); auto const seq = *(kwargs->d.begin());
auto const kw = static_cast<BoxedString*>(seq.first); auto const kw = static_cast<BoxedString*>(seq.first.value);
if (kw->s() == "sequence") if (kw->s() == "sequence")
elements = seq.second; elements = seq.second;
......
...@@ -2380,7 +2380,7 @@ public: ...@@ -2380,7 +2380,7 @@ public:
assert(PyDict_Check(_container)); assert(PyDict_Check(_container));
BoxedDict* container = static_cast<BoxedDict*>(_container); BoxedDict* container = static_cast<BoxedDict*>(_container);
for (const auto& p : container->d) { for (const auto& p : *container) {
AttrWrapper::setitem(self, p.first, p.second); AttrWrapper::setitem(self, p.first, p.second);
} }
} }
......
...@@ -659,22 +659,36 @@ struct PyLt { ...@@ -659,22 +659,36 @@ struct PyLt {
bool operator()(Box*, Box*) const; bool operator()(Box*, Box*) const;
}; };
struct PythonLevelEq { class BoxedDict : public Box {
static bool isEqual(Box* lhs, Box* rhs) { public:
if (lhs == rhs) // llvm::DenseMap doesn't store the original hash values, choosing to instead
// check for equality more often. This is probably a good tradeoff when the keys
// are pointers and comparison is cheap, but we want to make sure that keys with
// different hash values don't get compared.
struct BoxAndHash {
Box* value;
size_t hash;
BoxAndHash(Box* value) : value(value), hash(PyHasher()(value)) {}
BoxAndHash(Box* value, size_t hash) : value(value), hash(hash) {}
};
struct Comparisons {
static bool isEqual(BoxAndHash lhs, BoxAndHash rhs) {
if (lhs.value == rhs.value)
return true; return true;
if (rhs == getEmptyKey() || rhs == getTombstoneKey()) if (rhs.value == (Box*)-1 || rhs.value == (Box*)-2)
return false; return false;
return PyEq()(lhs, rhs); if (lhs.hash != rhs.hash)
return false;
return PyEq()(lhs.value, rhs.value);
} }
static Box* getEmptyKey() { return (Box*)-1; } static BoxAndHash getEmptyKey() { return BoxAndHash((Box*)-1, 0); }
static Box* getTombstoneKey() { return (Box*)-2; } static BoxAndHash getTombstoneKey() { return BoxAndHash((Box*)-2, 0); }
static unsigned getHashValue(Box* val) { return PyHasher()(val); } static unsigned getHashValue(BoxAndHash val) { return val.hash; }
}; };
class BoxedDict : public Box { typedef llvm::DenseMap<BoxAndHash, Box*, Comparisons> DictMap;
public:
typedef llvm::DenseMap<Box*, Box*, PythonLevelEq> DictMap;
DictMap d; DictMap d;
...@@ -683,12 +697,32 @@ public: ...@@ -683,12 +697,32 @@ public:
DEFAULT_CLASS_SIMPLE(dict_cls); DEFAULT_CLASS_SIMPLE(dict_cls);
Box* getOrNull(Box* k) { Box* getOrNull(Box* k) {
const auto& p = d.find(k); const auto& p = d.find(BoxAndHash(k));
if (p != d.end()) if (p != d.end())
return p->second; return p->second;
return NULL; return NULL;
} }
class iterator {
private:
DictMap::iterator it;
public:
iterator(DictMap::iterator it) : it(std::move(it)) {}
bool operator!=(const iterator& rhs) const { return it != rhs.it; }
bool operator==(const iterator& rhs) const { return it == rhs.it; }
iterator& operator++() {
++it;
return *this;
}
std::pair<Box*, Box*> operator*() const { return std::make_pair(it->first.value, it->second); }
Box* first() const { return it->first.value; }
};
iterator begin() { return iterator(d.begin()); }
iterator end() { return iterator(d.end()); }
static void gcHandler(GCVisitor* v, Box* b); static void gcHandler(GCVisitor* v, Box* b);
}; };
static_assert(sizeof(BoxedDict) == sizeof(PyDictObject), ""); static_assert(sizeof(BoxedDict) == sizeof(PyDictObject), "");
......
...@@ -225,7 +225,7 @@ extern "C" void dumpEx(void* p, int levels) { ...@@ -225,7 +225,7 @@ extern "C" void dumpEx(void* p, int levels) {
if (levels > 0) { if (levels > 0) {
int i = 0; int i = 0;
for (auto t : d->d) { for (auto t : *d) {
printf("\nKey:"); printf("\nKey:");
dumpEx(t.first, levels - 1); dumpEx(t.first, levels - 1);
printf("Value:"); printf("Value:");
......
...@@ -38,3 +38,20 @@ nan = float('nan') ...@@ -38,3 +38,20 @@ nan = float('nan')
d[nan] = "hello world" d[nan] = "hello world"
print d[nan] print d[nan]
# Dicts should not check __eq__ for values that have different hash values,
# even if they internally cause a hash collision.
class C(int):
def __eq__(self, rhs):
print "eq", self, rhs
raise Exception("Error, should not call __eq__!")
def __hash__(self):
print "hash", self
return self
d = {}
for i in xrange(1000):
d[C(i)] = i
print len(d)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment