From 4b45e07d679099fdc660c35e8d2b68950689e65a Mon Sep 17 00:00:00 2001 From: raminfp Date: Thu, 19 Feb 2026 13:24:37 +0330 Subject: [PATCH 1/3] gh-144984: Fix crash in ExternalEntityParserCreate() error paths When ExternalEntityParserCreate() hits an error path (allocation failure), Py_DECREF(new_parser) triggers xmlparse_dealloc() on a partially-initialized object: 1. handlers is NULL, so clear_handlers dereferences NULL (SEGV). 2. Py_CLEAR(parent) in dealloc already decrements the parent's refcount, so the explicit Py_DECREF(self) is a double-decrement. Fix by adding a NULL guard in clear_handlers and setting parent to NULL before Py_DECREF(new_parser) in each error path so that dealloc does not over-decrement the parent's refcount. --- Lib/test/test_pyexpat.py | 29 +++++++++++++++++++ ...19-12-00-00.gh-issue-144984.b93995c982.rst | 3 ++ Modules/pyexpat.c | 6 ++++ 3 files changed, 38 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-02-19-12-00-00.gh-issue-144984.b93995c982.rst diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 74a75458289b4d..bb496081ef634d 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -824,6 +824,35 @@ def test_parent_parser_outlives_its_subparsers__chain(self): del subparser +class ExternalEntityParserCreateErrorTest(unittest.TestCase): + """ExternalEntityParserCreate error paths should not crash or leak + refcounts on the parent parser. + + See https://github.com/python/cpython/issues/144984. + """ + + def test_error_path_no_crash(self): + # When an allocation inside ExternalEntityParserCreate fails, + # the partially-initialized subparser is deallocated. This + # must not dereference NULL handlers or double-decrement the + # parent parser's refcount. + _testcapi = import_helper.import_module('_testcapi') + parser = expat.ParserCreate() + parser.buffer_text = True + rc_before = sys.getrefcount(parser) + + _testcapi.set_nomemory(1, 10) + try: + parser.ExternalEntityParserCreate(None) + except MemoryError: + pass + finally: + _testcapi.remove_mem_hooks() + + rc_after = sys.getrefcount(parser) + self.assertEqual(rc_after, rc_before) + + class ReparseDeferralTest(unittest.TestCase): def test_getter_setter_round_trip(self): parser = expat.ParserCreate() diff --git a/Misc/NEWS.d/next/Library/2026-02-19-12-00-00.gh-issue-144984.b93995c982.rst b/Misc/NEWS.d/next/Library/2026-02-19-12-00-00.gh-issue-144984.b93995c982.rst new file mode 100644 index 00000000000000..66e07dc3098c5f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-19-12-00-00.gh-issue-144984.b93995c982.rst @@ -0,0 +1,3 @@ +Fix crash in :meth:`xml.parsers.expat.xmlparser.ExternalEntityParserCreate` +when an allocation fails. The error paths could dereference NULL ``handlers`` +and double-decrement the parent parser's reference count. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index e9255038eee5b5..20c629c076d364 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1097,12 +1097,14 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, if (self->buffer != NULL) { new_parser->buffer = PyMem_Malloc(new_parser->buffer_size); if (new_parser->buffer == NULL) { + new_parser->parent = NULL; Py_DECREF(new_parser); Py_DECREF(self); return PyErr_NoMemory(); } } if (!new_parser->itself) { + new_parser->parent = NULL; Py_DECREF(new_parser); Py_DECREF(self); return PyErr_NoMemory(); @@ -1117,6 +1119,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser->handlers = PyMem_New(PyObject *, i); if (!new_parser->handlers) { + new_parser->parent = NULL; Py_DECREF(new_parser); Py_DECREF(self); return PyErr_NoMemory(); @@ -2489,6 +2492,9 @@ PyInit_pyexpat(void) static void clear_handlers(xmlparseobject *self, int initial) { + if (self->handlers == NULL) { + return; + } for (size_t i = 0; handler_info[i].name != NULL; i++) { if (initial) { self->handlers[i] = NULL; From be72e78607ec0f29b16bb1432c9419aa9be7ec2d Mon Sep 17 00:00:00 2001 From: raminfp Date: Fri, 20 Feb 2026 00:16:06 +0330 Subject: [PATCH 2/3] gh-144984: Address review: use Py_NewRef(self) for parent, add setUpClass --- Lib/test/test_pyexpat.py | 13 +++++++++---- Modules/pyexpat.c | 16 ++++------------ 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index bb496081ef634d..f7478bd79781e6 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -831,23 +831,28 @@ class ExternalEntityParserCreateErrorTest(unittest.TestCase): See https://github.com/python/cpython/issues/144984. """ + @classmethod + def setUpClass(cls): + cls._testcapi = import_helper.import_module('_testcapi') + def test_error_path_no_crash(self): # When an allocation inside ExternalEntityParserCreate fails, # the partially-initialized subparser is deallocated. This # must not dereference NULL handlers or double-decrement the # parent parser's refcount. - _testcapi = import_helper.import_module('_testcapi') parser = expat.ParserCreate() parser.buffer_text = True rc_before = sys.getrefcount(parser) - _testcapi.set_nomemory(1, 10) + self._testcapi.set_nomemory(1, 10) + raised = False try: parser.ExternalEntityParserCreate(None) except MemoryError: - pass + raised = True finally: - _testcapi.remove_mem_hooks() + self._testcapi.remove_mem_hooks() + self.assertTrue(raised, "MemoryError not raised") rc_after = sys.getrefcount(parser) self.assertEqual(rc_after, rc_before) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 20c629c076d364..a1fa51514afc85 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1076,11 +1076,6 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, return NULL; } - // The new subparser will make use of the parent XML_Parser inside of Expat. - // So we need to take subparsers into account with the reference counting - // of their parent parser. - Py_INCREF(self); - new_parser->buffer_size = self->buffer_size; new_parser->buffer_used = 0; new_parser->buffer = NULL; @@ -1090,23 +1085,22 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser->ns_prefixes = self->ns_prefixes; new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context, encoding); - new_parser->parent = (PyObject *)self; + // The new subparser will make use of the parent XML_Parser inside of Expat. + // So we need to take subparsers into account with the reference counting + // of their parent parser. + new_parser->parent = Py_NewRef(self); new_parser->handlers = 0; new_parser->intern = Py_XNewRef(self->intern); if (self->buffer != NULL) { new_parser->buffer = PyMem_Malloc(new_parser->buffer_size); if (new_parser->buffer == NULL) { - new_parser->parent = NULL; Py_DECREF(new_parser); - Py_DECREF(self); return PyErr_NoMemory(); } } if (!new_parser->itself) { - new_parser->parent = NULL; Py_DECREF(new_parser); - Py_DECREF(self); return PyErr_NoMemory(); } @@ -1119,9 +1113,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser->handlers = PyMem_New(PyObject *, i); if (!new_parser->handlers) { - new_parser->parent = NULL; Py_DECREF(new_parser); - Py_DECREF(self); return PyErr_NoMemory(); } clear_handlers(new_parser, 1); From f3c1732ec1b0130989e04e314794403c9daaaa33 Mon Sep 17 00:00:00 2001 From: raminfp Date: Fri, 20 Feb 2026 01:51:17 +0330 Subject: [PATCH 3/3] gh-144984: Address review: rename to cls.testcapi, add comment for assertRaises --- Lib/test/test_pyexpat.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index f7478bd79781e6..6d7099750c13cb 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -833,7 +833,7 @@ class ExternalEntityParserCreateErrorTest(unittest.TestCase): @classmethod def setUpClass(cls): - cls._testcapi = import_helper.import_module('_testcapi') + cls.testcapi = import_helper.import_module('_testcapi') def test_error_path_no_crash(self): # When an allocation inside ExternalEntityParserCreate fails, @@ -844,14 +844,17 @@ def test_error_path_no_crash(self): parser.buffer_text = True rc_before = sys.getrefcount(parser) - self._testcapi.set_nomemory(1, 10) + # We avoid self.assertRaises(MemoryError) here because the + # context manager itself needs memory allocations that fail + # while the nomemory hook is active. + self.testcapi.set_nomemory(1, 10) raised = False try: parser.ExternalEntityParserCreate(None) except MemoryError: raised = True finally: - self._testcapi.remove_mem_hooks() + self.testcapi.remove_mem_hooks() self.assertTrue(raised, "MemoryError not raised") rc_after = sys.getrefcount(parser)