From 554d5c156ab6a665977e7bdd992f81cc47a1d72e Mon Sep 17 00:00:00 2001 From: "aleksandr.tseluyko" Date: Mon, 6 Apr 2026 11:32:52 +0300 Subject: [PATCH 1/2] gh-142659: Optimize set_swap_bodies for intersection_update Replace the general-purpose set_swap_bodies() with a specialized set_replace_body() that exploits the invariant that src is always a uniquely-referenced temporary about to be discarded. --- Objects/setobject.c | 102 +++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/Objects/setobject.c b/Objects/setobject.c index 1e630563604552..394df3c69af0cd 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1475,74 +1475,68 @@ copy_small_table(setentry *dest, setentry *src) } #endif -/* set_swap_bodies() switches the contents of any two sets by moving their - internal data pointers and, if needed, copying the internal smalltables. - Semantically equivalent to: +/* set_replace_body() replaces the contents of dst with those of src, + moving dst's old contents into src for proper cleanup on Py_DECREF. - t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t + The caller guarantees that src is a uniquely-referenced temporary set + that will be discarded immediately afterward. This allows us to skip + atomic operations and shared-marking on src's fields, and to skip the + frozenset hash swap (neither argument is ever a frozenset here). The function always succeeds and it leaves both objects in a stable state. - Useful for operations that update in-place (by allowing an intermediate - result to be swapped into one of the original inputs). */ static void -set_swap_bodies(PySetObject *a, PySetObject *b) +set_replace_body(PySetObject *dst, PySetObject *src) { Py_ssize_t t; setentry *u; setentry tab[PySet_MINSIZE]; - Py_hash_t h; - - setentry *a_table = a->table; - setentry *b_table = b->table; - FT_ATOMIC_STORE_PTR_RELEASE(a->table, NULL); - FT_ATOMIC_STORE_PTR_RELEASE(b->table, NULL); - - t = a->fill; a->fill = b->fill; b->fill = t; - t = a->used; - FT_ATOMIC_STORE_SSIZE_RELAXED(a->used, b->used); - FT_ATOMIC_STORE_SSIZE_RELAXED(b->used, t); - t = a->mask; - FT_ATOMIC_STORE_SSIZE_RELEASE(a->mask, b->mask); - FT_ATOMIC_STORE_SSIZE_RELEASE(b->mask, t); - - u = a_table; - if (a_table == a->smalltable) - u = b->smalltable; - a_table = b_table; - if (b_table == b->smalltable) - a_table = a->smalltable; - b_table = u; - - if (a_table == a->smalltable || b_table == b->smalltable) { - memcpy(tab, a->smalltable, sizeof(tab)); + + assert(!PyType_IsSubtype(Py_TYPE(dst), &PyFrozenSet_Type)); + assert(!PyType_IsSubtype(Py_TYPE(src), &PyFrozenSet_Type)); + assert(Py_REFCNT(src) == 1); + + setentry *dst_table = dst->table; + setentry *src_table = src->table; + FT_ATOMIC_STORE_PTR_RELEASE(dst->table, NULL); + src->table = NULL; + + t = dst->fill; dst->fill = src->fill; src->fill = t; + t = dst->used; + FT_ATOMIC_STORE_SSIZE_RELAXED(dst->used, src->used); + src->used = t; + t = dst->mask; + FT_ATOMIC_STORE_SSIZE_RELEASE(dst->mask, src->mask); + src->mask = t; + + u = dst_table; + if (dst_table == dst->smalltable) + u = src->smalltable; + dst_table = src_table; + if (src_table == src->smalltable) + dst_table = dst->smalltable; + src_table = u; + + if (dst_table == dst->smalltable || src_table == src->smalltable) { + memcpy(tab, dst->smalltable, sizeof(tab)); #ifndef Py_GIL_DISABLED - memcpy(a->smalltable, b->smalltable, sizeof(tab)); - memcpy(b->smalltable, tab, sizeof(tab)); + memcpy(dst->smalltable, src->smalltable, sizeof(tab)); + memcpy(src->smalltable, tab, sizeof(tab)); #else - copy_small_table(a->smalltable, b->smalltable); - copy_small_table(b->smalltable, tab); + copy_small_table(dst->smalltable, src->smalltable); + memcpy(src->smalltable, tab, sizeof(tab)); #endif } - if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) && - PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) { - h = FT_ATOMIC_LOAD_SSIZE_RELAXED(a->hash); - FT_ATOMIC_STORE_SSIZE_RELAXED(a->hash, FT_ATOMIC_LOAD_SSIZE_RELAXED(b->hash)); - FT_ATOMIC_STORE_SSIZE_RELAXED(b->hash, h); - } else { - FT_ATOMIC_STORE_SSIZE_RELAXED(a->hash, -1); - FT_ATOMIC_STORE_SSIZE_RELAXED(b->hash, -1); - } - if (!SET_IS_SHARED(b) && SET_IS_SHARED(a)) { - SET_MARK_SHARED(b); - } - if (!SET_IS_SHARED(a) && SET_IS_SHARED(b)) { - SET_MARK_SHARED(a); + FT_ATOMIC_STORE_SSIZE_RELAXED(dst->hash, -1); + + if (SET_IS_SHARED(dst)) { + SET_MARK_SHARED(src); } - FT_ATOMIC_STORE_PTR_RELEASE(a->table, a_table); - FT_ATOMIC_STORE_PTR_RELEASE(b->table, b_table); + + FT_ATOMIC_STORE_PTR_RELEASE(dst->table, dst_table); + src->table = src_table; } /*[clinic input] @@ -1797,7 +1791,7 @@ set_intersection_update(PySetObject *so, PyObject *other) tmp = set_intersection(so, other); if (tmp == NULL) return NULL; - set_swap_bodies(so, (PySetObject *)tmp); + set_replace_body(so, (PySetObject *)tmp); Py_DECREF(tmp); Py_RETURN_NONE; } @@ -1821,7 +1815,7 @@ set_intersection_update_multi_impl(PySetObject *so, PyObject * const *others, if (tmp == NULL) return NULL; Py_BEGIN_CRITICAL_SECTION(so); - set_swap_bodies(so, (PySetObject *)tmp); + set_replace_body(so, (PySetObject *)tmp); Py_END_CRITICAL_SECTION(); Py_DECREF(tmp); Py_RETURN_NONE; From 37a95e6d163846eef69222597c9c86e63e70b445 Mon Sep 17 00:00:00 2001 From: "aleksandr.tseluyko" Date: Mon, 6 Apr 2026 11:45:04 +0300 Subject: [PATCH 2/2] Add news entry for gh-142659 --- .../2026-04-06-11-32-52.gh-issue-142659.44riUp.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-06-11-32-52.gh-issue-142659.44riUp.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-06-11-32-52.gh-issue-142659.44riUp.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-06-11-32-52.gh-issue-142659.44riUp.rst new file mode 100644 index 00000000000000..a8fe979dfc425a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-06-11-32-52.gh-issue-142659.44riUp.rst @@ -0,0 +1,3 @@ +Optimize :meth:`set.intersection_update` by replacing the general-purpose +``set_swap_bodies()`` with a specialized ``set_replace_body()`` that skips +unnecessary atomic operations and checks for the temporary set argument.