-
-
Notifications
You must be signed in to change notification settings - Fork 34k
gh-144356: Avoid races when computing set_iterator.__length_hint__ under no-gil
#144357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
47d75fe
3e3785c
229ced3
cdcf88a
21f1478
a18c698
79b5fbc
6ac15e0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Fix a data race in ``set_iterator.__length_hint__`` under ``Py_GIL_DISABLED``. |
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1056,8 +1056,23 @@ setiter_len(PyObject *op, PyObject *Py_UNUSED(ignored)) | |||||||||||||
| { | ||||||||||||||
| setiterobject *si = (setiterobject*)op; | ||||||||||||||
| Py_ssize_t len = 0; | ||||||||||||||
| if (si->si_set != NULL && si->si_used == si->si_set->used) | ||||||||||||||
| #ifdef Py_GIL_DISABLED | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might work for For several other iterations the approach is to keep the reference Note: I tried creating a minimal example where concurrent iteration fails, but I have succeeded yet (the example does not crash, although I have not run thread sanitizer on it yet) Test for concurrent iteration on set iterator
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you. I think your points make a lot of sense, and I really appreciate the two links you shared—they helped me get a more complete picture of the iterator-related data race.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we should fix this like we have fixed others and as Sam suggested only clear the associated set in non-free-threading builds. The current code is incorrect because it uses try incref which can fail spuriously if the set object is not marked to enable try incref. |
||||||||||||||
| PySetObject *so = si->si_set; | ||||||||||||||
| if (so != NULL) { | ||||||||||||||
| Py_BEGIN_CRITICAL_SECTION(so); | ||||||||||||||
| Py_ssize_t pos = FT_ATOMIC_LOAD_SSIZE_RELAXED(si->si_pos); | ||||||||||||||
| if (pos >= 0 && | ||||||||||||||
| si->si_used == FT_ATOMIC_LOAD_SSIZE_RELAXED(so->used)) | ||||||||||||||
| { | ||||||||||||||
| len = si->len; | ||||||||||||||
| } | ||||||||||||||
| Py_END_CRITICAL_SECTION(); | ||||||||||||||
| } | ||||||||||||||
| #else | ||||||||||||||
| if (si->si_set != NULL && si->si_used == si->si_set->used) { | ||||||||||||||
| len = si->len; | ||||||||||||||
| } | ||||||||||||||
| #endif | ||||||||||||||
| return PyLong_FromSsize_t(len); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
|
|
@@ -1096,6 +1111,7 @@ static PyObject *setiter_iternext(PyObject *self) | |||||||||||||
| Py_ssize_t i, mask; | ||||||||||||||
| setentry *entry; | ||||||||||||||
| PySetObject *so = si->si_set; | ||||||||||||||
| int exhausted = 0; | ||||||||||||||
|
|
||||||||||||||
| if (so == NULL) | ||||||||||||||
| return NULL; | ||||||||||||||
|
|
@@ -1111,24 +1127,59 @@ static PyObject *setiter_iternext(PyObject *self) | |||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| Py_BEGIN_CRITICAL_SECTION(so); | ||||||||||||||
| #ifdef Py_GIL_DISABLED | ||||||||||||||
| /* si_pos may be read outside the lock; keep it atomic in FT builds */ | ||||||||||||||
| i = FT_ATOMIC_LOAD_SSIZE_RELAXED(si->si_pos); | ||||||||||||||
| if (i < 0) { | ||||||||||||||
| /* iterator already exhausted */ | ||||||||||||||
| goto done; | ||||||||||||||
| } | ||||||||||||||
| #else | ||||||||||||||
| i = si->si_pos; | ||||||||||||||
| assert(i>=0); | ||||||||||||||
| entry = so->table; | ||||||||||||||
| mask = so->mask; | ||||||||||||||
| while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy)) { | ||||||||||||||
| i++; | ||||||||||||||
| if (i < 0) { | ||||||||||||||
| /* iterator already exhausted */ | ||||||||||||||
| exhausted = 1; | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You cannot directly return as it would skip ending the critical section |
||||||||||||||
| } | ||||||||||||||
| if (i <= mask) { | ||||||||||||||
| key = Py_NewRef(entry[i].key); | ||||||||||||||
| #endif | ||||||||||||||
|
|
||||||||||||||
| if (!exhausted) { | ||||||||||||||
| assert(i >= 0); | ||||||||||||||
| entry = so->table; | ||||||||||||||
| mask = so->mask; | ||||||||||||||
| while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy)) { | ||||||||||||||
| i++; | ||||||||||||||
| } | ||||||||||||||
| if (i <= mask) { | ||||||||||||||
| key = Py_NewRef(entry[i].key); | ||||||||||||||
| #ifdef Py_GIL_DISABLED | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should follow the pattern that we use in other iterators: don't clear That will keep other things simpler. |
||||||||||||||
| FT_ATOMIC_STORE_SSIZE_RELAXED(si->si_pos, i + 1); | ||||||||||||||
| #else | ||||||||||||||
| si->si_pos = i + 1; | ||||||||||||||
| #endif | ||||||||||||||
|
Comment on lines
+1154
to
+1158
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
On the normal build the macro will expand to |
||||||||||||||
| si->len--; | ||||||||||||||
| } | ||||||||||||||
| else { | ||||||||||||||
| #ifdef Py_GIL_DISABLED | ||||||||||||||
| /* free-threaded: keep si_set; just mark exhausted */ | ||||||||||||||
| FT_ATOMIC_STORE_SSIZE_RELAXED(si->si_pos, -1); | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The value -1 written here could be overwritten by a concurrent thread (at line 1155). Which means that over exhaustion of the set iterator it is restored back to life. This does not lead to overflows or other issues (afaic), but is a bit odd behaviour. |
||||||||||||||
| si->len = 0; | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This (and some other places) should also be atomic? |
||||||||||||||
| #else | ||||||||||||||
| si->si_set = NULL; | ||||||||||||||
| #endif | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| #ifdef Py_GIL_DISABLED | ||||||||||||||
| done: | ||||||||||||||
| #endif | ||||||||||||||
| Py_END_CRITICAL_SECTION(); | ||||||||||||||
| si->si_pos = i+1; | ||||||||||||||
|
|
||||||||||||||
| if (key == NULL) { | ||||||||||||||
| si->si_set = NULL; | ||||||||||||||
| #ifndef Py_GIL_DISABLED | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think in the normal build you still have to do |
||||||||||||||
| Py_DECREF(so); | ||||||||||||||
| #endif | ||||||||||||||
| return NULL; | ||||||||||||||
| } | ||||||||||||||
| si->len--; | ||||||||||||||
| return key; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use the same style for starting and stopping threads as the other tests in this file (e.g. test_contains_hash_mutate)