Commit 74a42c5e authored by Kevin Modzelewski's avatar Kevin Modzelewski

Save the key hash in dictionaries

This lets us avoid checking equality if the hash values didn't match.
Checking equality can involve calling into user code, so it can be both
a perf drain and a noticeable behavioral difference.
parent b6e726c0
......@@ -1825,7 +1825,7 @@ Box* astInterpretDeopt(CLFunction* clfunc, AST_expr* after_expr, AST_stmt* enclo
assert(clfunc->source->scoping->areGlobalsFromModule());
interpreter.setGlobals(source_info->parent_module);
for (const auto& p : frame_state.locals->d) {
for (const auto& p : *frame_state.locals) {
assert(p.first->cls == str_cls);
auto name = static_cast<BoxedString*>(p.first)->s();
if (name == PASSED_GENERATOR_NAME) {
......
......@@ -1061,7 +1061,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
// TODO Right now d just has all the python variables that are *initialized*
// But we also need to loop through all the uninitialized variables that we have
// access to and delete them from the locals dict
for (const auto& p : d->d) {
for (const auto& p : *d) {
Box* varname = p.first;
Box* value = p.second;
setitem(frame_info->boxedLocals, varname, value);
......
......@@ -561,7 +561,7 @@ void setupSys() {
void setupSysEnd() {
std::vector<Box*, StlCompatAllocator<Box*>> builtin_module_names;
for (auto& p : sys_modules_dict->d) {
for (const auto& p : *sys_modules_dict) {
builtin_module_names.push_back(p.first);
}
......
......@@ -102,7 +102,7 @@ Box* classobjNew(Box* _cls, Box* _name, Box* _bases, Box** _args) {
made->giveAttr("__module__", boxString(getCurrentModule()->name()));
made->giveAttr("__doc__", None);
for (auto& p : dict->d) {
for (const auto& p : *dict) {
RELEASE_ASSERT(p.first->cls == str_cls, "");
BoxedString* s = (BoxedString*)p.first;
internStringMortalInplace(s);
......
......@@ -50,7 +50,7 @@ Box* dictRepr(BoxedDict* self) {
try {
chars.push_back('{');
bool first = true;
for (const auto& p : self->d) {
for (const auto& p : *self) {
if (!first) {
chars.push_back(',');
chars.push_back(' ');
......@@ -93,7 +93,7 @@ Box* dictItems(BoxedDict* self) {
BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size());
for (const auto& p : self->d) {
for (const auto& p : *self) {
BoxedTuple* t = BoxedTuple::create({ p.first, p.second });
listAppendInternal(rtn, t);
}
......@@ -104,7 +104,7 @@ Box* dictItems(BoxedDict* self) {
Box* dictValues(BoxedDict* self) {
BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size());
for (const auto& p : self->d) {
for (const auto& p : *self) {
listAppendInternal(rtn, p.second);
}
return rtn;
......@@ -115,7 +115,7 @@ Box* dictKeys(BoxedDict* self) {
BoxedList* rtn = new BoxedList();
rtn->ensure(self->d.size());
for (const auto& p : self->d) {
for (const auto& p : *self) {
listAppendInternal(rtn, p.first);
}
return rtn;
......@@ -357,7 +357,7 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb
return 0;
}
*pkey = (*it)->first;
*pkey = (*it)->first.value;
*pvalue = (*it)->second;
++(*it);
......@@ -470,7 +470,7 @@ Box* dictPopitem(BoxedDict* self) {
raiseExcHelper(KeyError, "popitem(): dictionary is empty");
}
Box* key = it->first;
Box* key = it->first.value;
Box* value = it->second;
self->d.erase(it);
......@@ -563,7 +563,7 @@ Box* dictEq(BoxedDict* self, Box* _rhs) {
if (self->d.size() != rhs->d.size())
return False;
for (const auto& p : self->d) {
for (const auto& p : *self) {
auto it = rhs->d.find(p.first);
if (it == rhs->d.end())
return False;
......@@ -708,7 +708,7 @@ void BoxedDict::gcHandler(GCVisitor* v, Box* b) {
BoxedDict* d = (BoxedDict*)b;
for (auto p : d->d) {
for (auto p : *d) {
v->visit(p.first);
v->visit(p.second);
}
......
......@@ -65,11 +65,11 @@ Box* dictIterNext(Box* s) {
Box* rtn = nullptr;
if (self->type == BoxedDictIterator::KeyIterator) {
rtn = self->it->first;
rtn = self->it->first.value;
} else if (self->type == BoxedDictIterator::ValueIterator) {
rtn = self->it->second;
} else if (self->type == BoxedDictIterator::ItemIterator) {
rtn = BoxedTuple::create({ self->it->first, self->it->second });
rtn = BoxedTuple::create({ self->it->first.value, self->it->second });
}
++self->it;
return rtn;
......
......@@ -3335,7 +3335,7 @@ void rearrangeArguments(ParamReceiveSpec paramspec, const ParamNames* param_name
assert(PyDict_Check(kwargs));
BoxedDict* d_kwargs = static_cast<BoxedDict*>(kwargs);
for (auto& p : d_kwargs->d) {
for (const auto& p : *d_kwargs) {
auto k = coerceUnicodeToStr(p.first);
if (k->cls != str_cls)
......@@ -5303,7 +5303,7 @@ Box* _typeNew(BoxedClass* metatype, BoxedString* name, BoxedTuple* bases, BoxedD
made->setattr(dict_str, dict_descr, NULL);
}
for (const auto& p : attr_dict->d) {
for (const auto& p : *attr_dict) {
auto k = coerceUnicodeToStr(p.first);
RELEASE_ASSERT(k->cls == str_cls, "");
......
......@@ -350,7 +350,7 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
} else {
assert(kwargs_sz);
auto const seq = *(kwargs->d.begin());
auto const kw = static_cast<BoxedString*>(seq.first);
auto const kw = static_cast<BoxedString*>(seq.first.value);
if (kw->s() == "sequence")
elements = seq.second;
......
......@@ -2380,7 +2380,7 @@ public:
assert(PyDict_Check(_container));
BoxedDict* container = static_cast<BoxedDict*>(_container);
for (const auto& p : container->d) {
for (const auto& p : *container) {
AttrWrapper::setitem(self, p.first, p.second);
}
}
......
......@@ -659,22 +659,36 @@ struct PyLt {
bool operator()(Box*, Box*) const;
};
struct PythonLevelEq {
static bool isEqual(Box* lhs, Box* rhs) {
if (lhs == rhs)
return true;
if (rhs == getEmptyKey() || rhs == getTombstoneKey())
return false;
return PyEq()(lhs, rhs);
}
static Box* getEmptyKey() { return (Box*)-1; }
static Box* getTombstoneKey() { return (Box*)-2; }
static unsigned getHashValue(Box* val) { return PyHasher()(val); }
};
class BoxedDict : public Box {
public:
typedef llvm::DenseMap<Box*, Box*, PythonLevelEq> DictMap;
// llvm::DenseMap doesn't store the original hash values, choosing to instead
// check for equality more often. This is probably a good tradeoff when the keys
// are pointers and comparison is cheap, but we want to make sure that keys with
// different hash values don't get compared.
struct BoxAndHash {
Box* value;
size_t hash;
BoxAndHash(Box* value) : value(value), hash(PyHasher()(value)) {}
BoxAndHash(Box* value, size_t hash) : value(value), hash(hash) {}
};
struct Comparisons {
static bool isEqual(BoxAndHash lhs, BoxAndHash rhs) {
if (lhs.value == rhs.value)
return true;
if (rhs.value == (Box*)-1 || rhs.value == (Box*)-2)
return false;
if (lhs.hash != rhs.hash)
return false;
return PyEq()(lhs.value, rhs.value);
}
static BoxAndHash getEmptyKey() { return BoxAndHash((Box*)-1, 0); }
static BoxAndHash getTombstoneKey() { return BoxAndHash((Box*)-2, 0); }
static unsigned getHashValue(BoxAndHash val) { return val.hash; }
};
typedef llvm::DenseMap<BoxAndHash, Box*, Comparisons> DictMap;
DictMap d;
......@@ -683,12 +697,32 @@ public:
DEFAULT_CLASS_SIMPLE(dict_cls);
Box* getOrNull(Box* k) {
const auto& p = d.find(k);
const auto& p = d.find(BoxAndHash(k));
if (p != d.end())
return p->second;
return NULL;
}
class iterator {
private:
DictMap::iterator it;
public:
iterator(DictMap::iterator it) : it(std::move(it)) {}
bool operator!=(const iterator& rhs) const { return it != rhs.it; }
bool operator==(const iterator& rhs) const { return it == rhs.it; }
iterator& operator++() {
++it;
return *this;
}
std::pair<Box*, Box*> operator*() const { return std::make_pair(it->first.value, it->second); }
Box* first() const { return it->first.value; }
};
iterator begin() { return iterator(d.begin()); }
iterator end() { return iterator(d.end()); }
static void gcHandler(GCVisitor* v, Box* b);
};
static_assert(sizeof(BoxedDict) == sizeof(PyDictObject), "");
......
......@@ -225,7 +225,7 @@ extern "C" void dumpEx(void* p, int levels) {
if (levels > 0) {
int i = 0;
for (auto t : d->d) {
for (auto t : *d) {
printf("\nKey:");
dumpEx(t.first, levels - 1);
printf("Value:");
......
......@@ -38,3 +38,20 @@ nan = float('nan')
d[nan] = "hello world"
print d[nan]
# Dicts should not check __eq__ for values that have different hash values,
# even if they internally cause a hash collision.
class C(int):
def __eq__(self, rhs):
print "eq", self, rhs
raise Exception("Error, should not call __eq__!")
def __hash__(self):
print "hash", self
return self
d = {}
for i in xrange(1000):
d[C(i)] = i
print len(d)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment