Skip to content

Commit 8e211b1

Browse files
authored
gh-141510: Optimize hash(frozendict) (#144919)
hash(frozendict) no longer creates a temporary items view and a temporary frozenset object. Copy frozenset_hash() code to frozendict_hash().
1 parent 6577d87 commit 8e211b1

File tree

3 files changed

+52
-18
lines changed

3 files changed

+52
-18
lines changed

Lib/test/test_dict.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,6 +1775,15 @@ class MyFrozenDict(frozendict):
17751775
d = MyFrozenDict(x=1, y=2)
17761776
self.assertEqual(repr(d), "MyFrozenDict({'x': 1, 'y': 2})")
17771777

1778+
def test_hash(self):
1779+
# hash() doesn't rely on the items order
1780+
self.assertEqual(hash(frozendict(x=1, y=2)),
1781+
hash(frozendict(y=2, x=1)))
1782+
1783+
fd = frozendict(x=[1], y=[2])
1784+
with self.assertRaisesRegex(TypeError, "unhashable type: 'list'"):
1785+
hash(fd)
1786+
17781787

17791788
if __name__ == "__main__":
17801789
unittest.main()

Objects/dictobject.c

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7881,33 +7881,55 @@ frozendict_repr(PyObject *self)
78817881
return res;
78827882
}
78837883

7884+
static Py_uhash_t
7885+
_shuffle_bits(Py_uhash_t h)
7886+
{
7887+
return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
7888+
}
7889+
7890+
// Code copied from frozenset_hash()
78847891
static Py_hash_t
78857892
frozendict_hash(PyObject *op)
78867893
{
78877894
PyFrozenDictObject *self = _PyFrozenDictObject_CAST(op);
7888-
Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ma_hash);
7889-
if (hash != -1) {
7890-
return hash;
7895+
Py_hash_t shash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ma_hash);
7896+
if (shash != -1) {
7897+
return shash;
78917898
}
78927899

7893-
PyObject *items = _PyDictView_New(op, &PyDictItems_Type);
7894-
if (items == NULL) {
7895-
return -1;
7896-
}
7897-
PyObject *frozenset = PyFrozenSet_New(items);
7898-
Py_DECREF(items);
7899-
if (frozenset == NULL) {
7900-
return -1;
7900+
PyDictObject *mp = _PyAnyDict_CAST(op);
7901+
Py_uhash_t hash = 0;
7902+
7903+
PyObject *key, *value; // borrowed refs
7904+
Py_ssize_t pos = 0;
7905+
while (PyDict_Next(op, &pos, &key, &value)) {
7906+
Py_hash_t key_hash = PyObject_Hash(key);
7907+
if (key_hash == -1) {
7908+
return -1;
7909+
}
7910+
hash ^= _shuffle_bits(key_hash);
7911+
7912+
Py_hash_t value_hash = PyObject_Hash(value);
7913+
if (value_hash == -1) {
7914+
return -1;
7915+
}
7916+
hash ^= _shuffle_bits(value_hash);
79017917
}
79027918

7903-
hash = PyObject_Hash(frozenset);
7904-
Py_DECREF(frozenset);
7905-
if (hash == -1) {
7906-
return -1;
7919+
/* Factor in the number of active entries */
7920+
hash ^= ((Py_uhash_t)mp->ma_used + 1) * 1927868237UL;
7921+
7922+
/* Disperse patterns arising in nested frozendicts */
7923+
hash ^= (hash >> 11) ^ (hash >> 25);
7924+
hash = hash * 69069U + 907133923UL;
7925+
7926+
/* -1 is reserved as an error code */
7927+
if (hash == (Py_uhash_t)-1) {
7928+
hash = 590923713UL;
79077929
}
79087930

7909-
FT_ATOMIC_STORE_SSIZE_RELAXED(self->ma_hash, hash);
7910-
return hash;
7931+
FT_ATOMIC_STORE_SSIZE_RELAXED(self->ma_hash, (Py_hash_t)hash);
7932+
return (Py_hash_t)hash;
79117933
}
79127934

79137935

Objects/setobject.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -964,7 +964,10 @@ _shuffle_bits(Py_uhash_t h)
964964
965965
This hash algorithm can be used on either a frozenset or a set.
966966
When it is used on a set, it computes the hash value of the equivalent
967-
frozenset without creating a new frozenset object. */
967+
frozenset without creating a new frozenset object.
968+
969+
If you update this code, update also frozendict_hash() which copied this
970+
code. */
968971

969972
static Py_hash_t
970973
frozenset_hash_impl(PyObject *self)

0 commit comments

Comments
 (0)