From 5373343a117769a6298185d71d18857c56b50822 Mon Sep 17 00:00:00 2001 From: Alper Date: Wed, 18 Feb 2026 01:35:39 -0600 Subject: [PATCH 1/3] Make PyUnstable_Code_SetExtra/GetExtra thread-safe --- Lib/test/test_free_threading/test_code.py | 102 +++++++++++++++++++++ Objects/codeobject.c | 103 ++++++++++++++++------ Python/ceval.c | 20 ++++- 3 files changed, 197 insertions(+), 28 deletions(-) diff --git a/Lib/test/test_free_threading/test_code.py b/Lib/test/test_free_threading/test_code.py index a5136a3ba4edc7..2b508701ff1001 100644 --- a/Lib/test/test_free_threading/test_code.py +++ b/Lib/test/test_free_threading/test_code.py @@ -1,9 +1,39 @@ import unittest +try: + import ctypes +except ImportError: + ctypes = None + from threading import Thread from unittest import TestCase from test.support import threading_helper +from test.support.threading_helper import run_concurrently + +if ctypes is not None: + capi = ctypes.pythonapi + + freefunc = ctypes.CFUNCTYPE(None, ctypes.c_voidp) + + RequestCodeExtraIndex = capi.PyUnstable_Eval_RequestCodeExtraIndex + RequestCodeExtraIndex.argtypes = (freefunc,) + RequestCodeExtraIndex.restype = ctypes.c_ssize_t + + SetExtra = capi.PyUnstable_Code_SetExtra + SetExtra.argtypes = (ctypes.py_object, ctypes.c_ssize_t, ctypes.c_voidp) + SetExtra.restype = ctypes.c_int + + GetExtra = capi.PyUnstable_Code_GetExtra + GetExtra.argtypes = ( + ctypes.py_object, + ctypes.c_ssize_t, + ctypes.POINTER(ctypes.c_voidp), + ) + GetExtra.restype = ctypes.c_int + +NTHREADS = 20 + @threading_helper.requires_working_threading() class TestCode(TestCase): @@ -25,6 +55,78 @@ def run_in_thread(): for thread in threads: thread.join() + @unittest.skipUnless(ctypes, "ctypes is required") + def test_request_code_extra_index_concurrent(self): + """Test concurrent calls to RequestCodeExtraIndex""" + results = [] + + def worker(): + idx = RequestCodeExtraIndex(freefunc(0)) + self.assertGreaterEqual(idx, 0) + results.append(idx) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + + # Every thread must get a unique index. + self.assertEqual(len(results), NTHREADS) + self.assertEqual(len(set(results)), NTHREADS) + + @unittest.skipUnless(ctypes, "ctypes is required") + def test_code_extra_all_ops_concurrent(self): + """Test concurrent RequestCodeExtraIndex + SetExtra + GetExtra""" + LOOP = 100 + + def f(): + pass + + code = f.__code__ + + def worker(): + idx = RequestCodeExtraIndex(freefunc(0)) + self.assertGreaterEqual(idx, 0) + + for i in range(LOOP): + SetExtra(code, idx, ctypes.c_voidp(i + 1)) + + for _ in range(LOOP): + extra = ctypes.c_voidp() + GetExtra(code, idx, extra) + # The slot was set by this thread, so the value must + # be the last one written. + self.assertEqual(extra.value, LOOP) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + + @unittest.skipUnless(ctypes, "ctypes is required") + def test_code_extra_set_get_concurrent(self): + """Test concurrent SetExtra + GetExtra on a shared index""" + LOOP = 100 + + def f(): + pass + + code = f.__code__ + + idx = RequestCodeExtraIndex(freefunc(0)) + self.assertGreaterEqual(idx, 0) + + def worker(): + for i in range(LOOP): + SetExtra(code, idx, ctypes.c_voidp(i + 1)) + + for _ in range(LOOP): + extra = ctypes.c_voidp() + GetExtra(code, idx, extra) + # Value is set by any writer thread. + self.assertTrue(1 <= extra.value <= LOOP) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + + # Every thread's last write is LOOP, so the final value must be LOOP. + extra = ctypes.c_voidp() + GetExtra(code, idx, extra) + self.assertEqual(extra.value, LOOP) + if __name__ == "__main__": unittest.main() diff --git a/Objects/codeobject.c b/Objects/codeobject.c index ed3cc41480ab5c..c1ff14fa89ef37 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1575,6 +1575,12 @@ typedef struct { } _PyCodeObjectExtra; +static inline size_t +code_extra_size(Py_ssize_t n) +{ + return sizeof(_PyCodeObjectExtra) + (n - 1) * sizeof(void *); +} + int PyUnstable_Code_GetExtra(PyObject *code, Py_ssize_t index, void **extra) { @@ -1583,15 +1589,19 @@ PyUnstable_Code_GetExtra(PyObject *code, Py_ssize_t index, void **extra) return -1; } - PyCodeObject *o = (PyCodeObject*) code; - _PyCodeObjectExtra *co_extra = (_PyCodeObjectExtra*) o->co_extra; + PyCodeObject *o = (PyCodeObject *)code; + *extra = NULL; - if (co_extra == NULL || index < 0 || co_extra->ce_size <= index) { - *extra = NULL; + if (index < 0) { return 0; } - *extra = co_extra->ce_extras[index]; + // Lock-free read; pairs with release store in SetExtra. + _PyCodeObjectExtra *co_extra = FT_ATOMIC_LOAD_PTR_ACQUIRE(o->co_extra); + if (co_extra != NULL && index < co_extra->ce_size) { + *extra = co_extra->ce_extras[index]; + } + return 0; } @@ -1601,40 +1611,81 @@ PyUnstable_Code_SetExtra(PyObject *code, Py_ssize_t index, void *extra) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!PyCode_Check(code) || index < 0 || - index >= interp->co_extra_user_count) { + // co_extra_user_count increases monotonically and is published with a + // release store, so once an index is valid it remains valid. + Py_ssize_t user_count = FT_ATOMIC_LOAD_SSIZE_ACQUIRE( + interp->co_extra_user_count); + + if (!PyCode_Check(code) || index < 0 || index >= user_count) { PyErr_BadInternalCall(); return -1; } - PyCodeObject *o = (PyCodeObject*) code; - _PyCodeObjectExtra *co_extra = (_PyCodeObjectExtra *) o->co_extra; + PyCodeObject *o = (PyCodeObject *) code; + int res = 0; - if (co_extra == NULL || co_extra->ce_size <= index) { - Py_ssize_t i = (co_extra == NULL ? 0 : co_extra->ce_size); - co_extra = PyMem_Realloc( - co_extra, - sizeof(_PyCodeObjectExtra) + - (interp->co_extra_user_count-1) * sizeof(void*)); - if (co_extra == NULL) { - return -1; - } - for (; i < interp->co_extra_user_count; i++) { - co_extra->ce_extras[i] = NULL; - } - co_extra->ce_size = interp->co_extra_user_count; - o->co_extra = co_extra; + Py_BEGIN_CRITICAL_SECTION(o); + + _PyCodeObjectExtra *old_extra = (_PyCodeObjectExtra *) o->co_extra; + Py_ssize_t old_size = (old_extra == NULL) ? 0 : old_extra->ce_size; + + // user_count > index is checked above. + Py_ssize_t new_size = old_size > index ? old_size : user_count; + assert(new_size > 0 && new_size > index); + + // Free-threaded builds require copy-on-write to avoid mutating + // co_extra while lock-free readers in GetExtra may be traversing it. + // GIL builds could realloc in place, but SetExtra is called rarely + // and co_extra is small, so use the same path for simplicity. + _PyCodeObjectExtra *co_extra = PyMem_Malloc(code_extra_size(new_size)); + if (co_extra == NULL) { + PyErr_NoMemory(); + res = -1; + goto done; } - if (co_extra->ce_extras[index] != NULL) { + co_extra->ce_size = new_size; + + // Copy existing extras from the old buffer. + if (old_size > 0) { + memcpy(co_extra->ce_extras, old_extra->ce_extras, + old_size * sizeof(void *)); + } + + // NULL-initialize new slots. + for (Py_ssize_t i = old_size; i < new_size; i++) { + co_extra->ce_extras[i] = NULL; + } + + if (old_extra != NULL && index < old_size && + old_extra->ce_extras[index] != NULL) + { + // Free the old extra value if a free function was registered. + // We assume the caller ensures no other thread is concurrently + // using the old value. freefunc free = interp->co_extra_freefuncs[index]; if (free != NULL) { - free(co_extra->ce_extras[index]); + free(old_extra->ce_extras[index]); } } co_extra->ce_extras[index] = extra; - return 0; + + // Publish pointer and slot writes to lock-free readers. + FT_ATOMIC_STORE_PTR_RELEASE(o->co_extra, co_extra); + + if (old_extra != NULL) { +#ifdef Py_GIL_DISABLED + // Defer container free for lock-free readers. + _PyMem_FreeDelayed(old_extra, code_extra_size(old_size)); +#else + PyMem_Free(old_extra); +#endif + } + +done:; + Py_END_CRITICAL_SECTION(); + return res; } diff --git a/Python/ceval.c b/Python/ceval.c index ab2eef560370f5..20772386faf6b4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3493,11 +3493,27 @@ PyUnstable_Eval_RequestCodeExtraIndex(freefunc free) PyInterpreterState *interp = _PyInterpreterState_GET(); Py_ssize_t new_index; - if (interp->co_extra_user_count == MAX_CO_EXTRA_USERS - 1) { +#ifdef Py_GIL_DISABLED + struct _py_code_state *state = &interp->code_state; + FT_MUTEX_LOCK(&state->mutex); +#endif + + if (interp->co_extra_user_count >= MAX_CO_EXTRA_USERS - 1) { +#ifdef Py_GIL_DISABLED + FT_MUTEX_UNLOCK(&state->mutex); +#endif return -1; } - new_index = interp->co_extra_user_count++; + + new_index = interp->co_extra_user_count; interp->co_extra_freefuncs[new_index] = free; + + // Publish freefuncs[new_index] before making the index visible. + FT_ATOMIC_STORE_SSIZE_RELEASE(interp->co_extra_user_count, new_index + 1); + +#ifdef Py_GIL_DISABLED + FT_MUTEX_UNLOCK(&state->mutex); +#endif return new_index; } From e2eef9a7cb675b99d7a9a86c7de8dec88a681484 Mon Sep 17 00:00:00 2001 From: alperyoney Date: Wed, 18 Feb 2026 15:18:02 -0800 Subject: [PATCH 2/3] Add news entry --- .../next/C_API/2026-02-18-15-12-34.gh-issue-144981.4ZdM63.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/C_API/2026-02-18-15-12-34.gh-issue-144981.4ZdM63.rst diff --git a/Misc/NEWS.d/next/C_API/2026-02-18-15-12-34.gh-issue-144981.4ZdM63.rst b/Misc/NEWS.d/next/C_API/2026-02-18-15-12-34.gh-issue-144981.4ZdM63.rst new file mode 100644 index 00000000000000..d86886ab09704a --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2026-02-18-15-12-34.gh-issue-144981.4ZdM63.rst @@ -0,0 +1,3 @@ +Made :c:func:`PyUnstable_Code_SetExtra`, :c:func:`PyUnstable_Code_GetExtra`, +and :c:func:`PyUnstable_Eval_RequestCodeExtraIndex` thread-safe on the +:term:`free threaded ` build. From 3420e150c67ab942d10955f0505fe4e628281c41 Mon Sep 17 00:00:00 2001 From: Alper Date: Thu, 19 Feb 2026 02:12:37 -0600 Subject: [PATCH 3/3] Address comments and refactor SetExtra implementation --- Lib/test/test_free_threading/test_code.py | 17 ++- Objects/codeobject.c | 151 +++++++++++++--------- 2 files changed, 104 insertions(+), 64 deletions(-) diff --git a/Lib/test/test_free_threading/test_code.py b/Lib/test/test_free_threading/test_code.py index 2b508701ff1001..2fc5eea3773c39 100644 --- a/Lib/test/test_free_threading/test_code.py +++ b/Lib/test/test_free_threading/test_code.py @@ -32,6 +32,8 @@ ) GetExtra.restype = ctypes.c_int +# Note: each call to RequestCodeExtraIndex permanently allocates a slot +# (the counter is monotonically increasing), up to MAX_CO_EXTRA_USERS (255). NTHREADS = 20 @@ -86,11 +88,13 @@ def worker(): self.assertGreaterEqual(idx, 0) for i in range(LOOP): - SetExtra(code, idx, ctypes.c_voidp(i + 1)) + ret = SetExtra(code, idx, ctypes.c_voidp(i + 1)) + self.assertEqual(ret, 0) for _ in range(LOOP): extra = ctypes.c_voidp() - GetExtra(code, idx, extra) + ret = GetExtra(code, idx, extra) + self.assertEqual(ret, 0) # The slot was set by this thread, so the value must # be the last one written. self.assertEqual(extra.value, LOOP) @@ -112,11 +116,13 @@ def f(): def worker(): for i in range(LOOP): - SetExtra(code, idx, ctypes.c_voidp(i + 1)) + ret = SetExtra(code, idx, ctypes.c_voidp(i + 1)) + self.assertEqual(ret, 0) for _ in range(LOOP): extra = ctypes.c_voidp() - GetExtra(code, idx, extra) + ret = GetExtra(code, idx, extra) + self.assertEqual(ret, 0) # Value is set by any writer thread. self.assertTrue(1 <= extra.value <= LOOP) @@ -124,7 +130,8 @@ def worker(): # Every thread's last write is LOOP, so the final value must be LOOP. extra = ctypes.c_voidp() - GetExtra(code, idx, extra) + ret = GetExtra(code, idx, extra) + self.assertEqual(ret, 0) self.assertEqual(extra.value, LOOP) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index c1ff14fa89ef37..fb600e599ee186 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1581,6 +1581,61 @@ code_extra_size(Py_ssize_t n) return sizeof(_PyCodeObjectExtra) + (n - 1) * sizeof(void *); } +#ifdef Py_GIL_DISABLED +static int +code_extra_grow_ft(PyCodeObject *co, _PyCodeObjectExtra *old_co_extra, + Py_ssize_t old_ce_size, Py_ssize_t new_ce_size, + Py_ssize_t index, void *extra) +{ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(co); + _PyCodeObjectExtra *new_co_extra = PyMem_Malloc( + code_extra_size(new_ce_size)); + if (new_co_extra == NULL) { + PyErr_NoMemory(); + return -1; + } + + if (old_ce_size > 0) { + memcpy(new_co_extra->ce_extras, old_co_extra->ce_extras, + old_ce_size * sizeof(void *)); + } + for (Py_ssize_t i = old_ce_size; i < new_ce_size; i++) { + new_co_extra->ce_extras[i] = NULL; + } + new_co_extra->ce_size = new_ce_size; + new_co_extra->ce_extras[index] = extra; + + // Publish new buffer and its contents to lock-free readers. + FT_ATOMIC_STORE_PTR_RELEASE(co->co_extra, new_co_extra); + if (old_co_extra != NULL) { + // QSBR: defer old-buffer free until lock-free readers quiesce. + _PyMem_FreeDelayed(old_co_extra, code_extra_size(old_ce_size)); + } + return 0; +} +#else +static int +code_extra_grow_gil(PyCodeObject *co, _PyCodeObjectExtra *old_co_extra, + Py_ssize_t old_ce_size, Py_ssize_t new_ce_size, + Py_ssize_t index, void *extra) +{ + _PyCodeObjectExtra *new_co_extra = PyMem_Realloc( + old_co_extra, code_extra_size(new_ce_size)); + if (new_co_extra == NULL) { + PyErr_NoMemory(); + return -1; + } + + for (Py_ssize_t i = old_ce_size; i < new_ce_size; i++) { + new_co_extra->ce_extras[i] = NULL; + } + new_co_extra->ce_size = new_ce_size; + new_co_extra->ce_extras[index] = extra; + co->co_extra = new_co_extra; + return 0; +} +#endif + int PyUnstable_Code_GetExtra(PyObject *code, Py_ssize_t index, void **extra) { @@ -1589,17 +1644,17 @@ PyUnstable_Code_GetExtra(PyObject *code, Py_ssize_t index, void **extra) return -1; } - PyCodeObject *o = (PyCodeObject *)code; + PyCodeObject *co = (PyCodeObject *)code; *extra = NULL; if (index < 0) { return 0; } - // Lock-free read; pairs with release store in SetExtra. - _PyCodeObjectExtra *co_extra = FT_ATOMIC_LOAD_PTR_ACQUIRE(o->co_extra); + // Lock-free read; pairs with release stores in SetExtra. + _PyCodeObjectExtra *co_extra = FT_ATOMIC_LOAD_PTR_ACQUIRE(co->co_extra); if (co_extra != NULL && index < co_extra->ce_size) { - *extra = co_extra->ce_extras[index]; + *extra = FT_ATOMIC_LOAD_PTR_ACQUIRE(co_extra->ce_extras[index]); } return 0; @@ -1611,8 +1666,9 @@ PyUnstable_Code_SetExtra(PyObject *code, Py_ssize_t index, void *extra) { PyInterpreterState *interp = _PyInterpreterState_GET(); - // co_extra_user_count increases monotonically and is published with a - // release store, so once an index is valid it remains valid. + // co_extra_user_count is monotonically increasing and published with + // release store in RequestCodeExtraIndex, so once an index is valid + // it stays valid. Py_ssize_t user_count = FT_ATOMIC_LOAD_SSIZE_ACQUIRE( interp->co_extra_user_count); @@ -1621,71 +1677,48 @@ PyUnstable_Code_SetExtra(PyObject *code, Py_ssize_t index, void *extra) return -1; } - PyCodeObject *o = (PyCodeObject *) code; - int res = 0; - - Py_BEGIN_CRITICAL_SECTION(o); + PyCodeObject *co = (PyCodeObject *)code; + int result = 0; + void *old_slot_value = NULL; - _PyCodeObjectExtra *old_extra = (_PyCodeObjectExtra *) o->co_extra; - Py_ssize_t old_size = (old_extra == NULL) ? 0 : old_extra->ce_size; + Py_BEGIN_CRITICAL_SECTION(co); - // user_count > index is checked above. - Py_ssize_t new_size = old_size > index ? old_size : user_count; - assert(new_size > 0 && new_size > index); + _PyCodeObjectExtra *old_co_extra = (_PyCodeObjectExtra *)co->co_extra; + Py_ssize_t old_ce_size = (old_co_extra == NULL) + ? 0 : old_co_extra->ce_size; - // Free-threaded builds require copy-on-write to avoid mutating - // co_extra while lock-free readers in GetExtra may be traversing it. - // GIL builds could realloc in place, but SetExtra is called rarely - // and co_extra is small, so use the same path for simplicity. - _PyCodeObjectExtra *co_extra = PyMem_Malloc(code_extra_size(new_size)); - if (co_extra == NULL) { - PyErr_NoMemory(); - res = -1; + // Fast path: slot already exists, update in place. + if (index < old_ce_size) { + old_slot_value = old_co_extra->ce_extras[index]; + FT_ATOMIC_STORE_PTR_RELEASE(old_co_extra->ce_extras[index], extra); goto done; } - co_extra->ce_size = new_size; - - // Copy existing extras from the old buffer. - if (old_size > 0) { - memcpy(co_extra->ce_extras, old_extra->ce_extras, - old_size * sizeof(void *)); - } - - // NULL-initialize new slots. - for (Py_ssize_t i = old_size; i < new_size; i++) { - co_extra->ce_extras[i] = NULL; - } - - if (old_extra != NULL && index < old_size && - old_extra->ce_extras[index] != NULL) - { - // Free the old extra value if a free function was registered. - // We assume the caller ensures no other thread is concurrently - // using the old value. - freefunc free = interp->co_extra_freefuncs[index]; - if (free != NULL) { - free(old_extra->ce_extras[index]); - } - } - - co_extra->ce_extras[index] = extra; - - // Publish pointer and slot writes to lock-free readers. - FT_ATOMIC_STORE_PTR_RELEASE(o->co_extra, co_extra); - - if (old_extra != NULL) { + // Slow path: buffer needs to grow. + Py_ssize_t new_ce_size = user_count; #ifdef Py_GIL_DISABLED - // Defer container free for lock-free readers. - _PyMem_FreeDelayed(old_extra, code_extra_size(old_size)); + // FT build: allocate new buffer and swap; QSBR reclaims the old one. + result = code_extra_grow_ft( + co, old_co_extra, old_ce_size, new_ce_size, index, extra); #else - PyMem_Free(old_extra); + // GIL build: grow with realloc. + result = code_extra_grow_gil( + co, old_co_extra, old_ce_size, new_ce_size, index, extra); #endif - } done:; Py_END_CRITICAL_SECTION(); - return res; + if (old_slot_value != NULL) { + // Free the old slot value if a free function was registered. + // The caller must ensure no other thread can still access the old + // value after this overwrite. + freefunc free_extra = interp->co_extra_freefuncs[index]; + if (free_extra != NULL) { + free_extra(old_slot_value); + } + } + + return result; }