diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index cace780f79f515..9f853397af4d95 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -276,6 +276,90 @@ def test_parse_again(self): self.assertEqual(expat.ErrorString(cm.exception.code), expat.errors.XML_ERROR_FINISHED) + @support.subTests("encoding", ("utf-8", "utf-16")) + def test_parse_reentrancy_with_encoding(self, encoding): + # See https://github.com/python/cpython/issues/146169. + parser = expat.ParserCreate(encoding=encoding) + + def CharacterDataHandler(data): + return parser.Parse(data, False) + CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler) + parser.CharacterDataHandler = CharacterDataHandler + + payload = "x".encode(encoding) + msg = re.escape("cannot call Parse() from within a handler") + with self.assertRaisesRegex(RuntimeError, msg): + for i in range(len(payload)): + parser.Parse(payload[i:i+1], i == len(payload) - 1) + CharacterDataHandler.assert_called_once_with("x") + + @support.subTests("encoding", ("utf-8", "utf-16")) + def test_parse_file_reentrancy_with_encoding(self, encoding): + # See https://github.com/python/cpython/issues/146169. + parser = expat.ParserCreate(encoding=encoding) + + def CharacterDataHandler(data): + return parser.ParseFile(BytesIO(data.encode(encoding))) + CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler) + parser.CharacterDataHandler = CharacterDataHandler + + payload = "x".encode(encoding) + payload_buffer = BytesIO(payload) + msg = re.escape("cannot call ParseFile() from within a handler") + with self.assertRaisesRegex(RuntimeError, msg): + parser.ParseFile(payload_buffer) + CharacterDataHandler.assert_called_once_with("x") + + @support.subTests("encoding", ("utf-8", "utf-16")) + def test_parse_reentrancy_allowed_for_external_parser(self, encoding): + parser = expat.ParserCreate(encoding=encoding) + subparser = parser.ExternalEntityParserCreate(None, encoding) + payload_extstr = '' + + def ExternalEntityRefHandler(*args): + subparser.Parse(payload_extstr, True) + # return a nonzero integer to indicate that parsing continues + return 1 + ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler) + parser.ExternalEntityRefHandler = ExternalEntityRefHandler + + payload = textwrap.dedent(f"""\ + + + &ext; + """).encode(encoding) + + # Check that external parsers be called from parent's handlers. + for i in range(len(payload)): + parser.Parse(payload[i:i+1], i == len(payload) - 1) + external_ref_args = ('ext', None, 'entity.file', None) + ExternalEntityRefHandler.assert_called_once_with(*external_ref_args) + + @support.subTests("encoding", ("utf-8", "utf-16")) + def test_parse_file_reentrancy_allowed_for_external_parser(self, encoding): + parser = expat.ParserCreate(encoding=encoding) + subparser = parser.ExternalEntityParserCreate(None, encoding) + payload_extstr = '' + + def ExternalEntityRefHandler(*args): + subparser.ParseFile(BytesIO(payload_extstr.encode(encoding))) + # return a nonzero integer to indicate that parsing continues + return 1 + ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler) + parser.ExternalEntityRefHandler = ExternalEntityRefHandler + + payload = textwrap.dedent(f"""\ + + + &ext; + """).encode(encoding) + + # Check that external parsers be called from parent's handlers. + parser.ParseFile(BytesIO(payload)) + external_ref_args = ('ext', None, 'entity.file', None) + ExternalEntityRefHandler.assert_called_once_with(*external_ref_args) + + class NamespaceSeparatorTest(unittest.TestCase): def test_legal(self): # Tests that make sure we get errors when the namespace_separator value diff --git a/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst b/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst new file mode 100644 index 00000000000000..d9460540a5a7d3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst @@ -0,0 +1,4 @@ +:mod:`xml.parsers.expat`: raise :exc:`RuntimeError` when an Expat handler +calls :meth:`parser.Parse ` or +:meth:`parser.ParseFile ` on the parser +that called the handler. Patch by Bénédikt Tran. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 31b883fe8bd548..5ff6476d81253f 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -780,6 +780,18 @@ VOID_HANDLER(StartDoctypeDecl, VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()")) +/* check that the current function is not called from within a handler */ +#define CHECK_NOT_IN_HANDLER(PARSER, FUNCNAME) \ + do { \ + if (PARSER->in_callback) { \ + PyErr_SetString(PyExc_RuntimeError, \ + "cannot call " FUNCNAME "() " \ + "from within a handler"); \ + return NULL; \ + } \ + } while (0) + + /* ---------------------------------------------------------------- */ /*[clinic input] class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype" @@ -857,6 +869,9 @@ pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls, PyObject *data, int isfinal) /*[clinic end generated code: output=8faffe07fe1f862a input=053e0f047e55c05a]*/ { + // avoid re-entrant calls to XML_Parse() + CHECK_NOT_IN_HANDLER(self, "Parse"); + const char *s; Py_ssize_t slen; Py_buffer view; @@ -956,6 +971,9 @@ pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls, PyObject *file) /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/ { + // avoid re-entrant calls to XML_GetBuffer() or XML_ParseBuffer() + CHECK_NOT_IN_HANDLER(self, "ParseFile"); + int rv = 1; PyObject *readmethod = NULL;