From c7cbb61a68213a80d5e76ae0ae66a89daa3aa7cf Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 8 Oct 2025 15:36:21 +0300 Subject: [PATCH 1/6] gh-139489: Add xml.sax.utils.is_valid_name() It allows to check whether a string can be used as an element or attribute name in XML. --- Doc/library/xml.sax.utils.rst | 14 +++++++++ Doc/whatsnew/3.15.rst | 8 +++++ Lib/test/test_sax.py | 22 ++++++++++++-- Lib/xml/sax/saxutils.py | 30 +++++++++++++++++-- ...-10-08-15-36-00.gh-issue-139489.W46tvn.rst | 2 ++ 5 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst diff --git a/Doc/library/xml.sax.utils.rst b/Doc/library/xml.sax.utils.rst index 5ee11d58c3dd26..7d950477443390 100644 --- a/Doc/library/xml.sax.utils.rst +++ b/Doc/library/xml.sax.utils.rst @@ -59,6 +59,20 @@ or as base classes. using the reference concrete syntax. +.. function:: is_valid_name(name) + + Return ``True`` if the string is a valid element or attribute name, + ``False`` otherwise. + + Almost all characters are permitted in names, except control characters and + those which either are or reasonably could be used as delimiters. + Characters like ":", "-", ".", "_", and "·" are permitted, but "<", "/", + "!", "?", and "=" are forbidden. + The name cannot start with a digit or a character like "-", ".", and "·". + + ..versionadded:: next + + .. class:: XMLGenerator(out=None, encoding='iso-8859-1', short_empty_elements=False) This class implements the :class:`~xml.sax.handler.ContentHandler` interface diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 4b176d6c8e6034..c907b3515ab015 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -580,6 +580,14 @@ xml.parsers.expat .. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack +xml.sax.utils +------------- + +* Add the :func:`~xml.sax.utils.is_valid_name` function, which allows to check + whether a string can be used as an element or attribute name in XML. + (Contributed by Serhiy Storchaka in :gh:`139489`.) + + zlib ---- diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 5c10bcedc69bc6..4f0824d3eea068 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -9,8 +9,9 @@ except SAXReaderNotAvailable: # don't try to test this module if we cannot create a parser raise unittest.SkipTest("no XML parsers available") -from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ - XMLFilterBase, prepare_input_source +from xml.sax.saxutils import (XMLGenerator, escape, unescape, quoteattr, + is_valid_name, + XMLFilterBase, prepare_input_source) from xml.sax.expatreader import create_parser from xml.sax.handler import (feature_namespaces, feature_external_ges, LexicalHandler) @@ -343,6 +344,23 @@ def test_single_double_quoteattr(self): self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"), "\"Includes 'single' and "double" quotes\"") + def test_is_valid_name(self): + self.assertFalse(is_valid_name('')) + self.assertTrue(is_valid_name('name')) + self.assertTrue(is_valid_name('NAME')) + self.assertTrue(is_valid_name('name0:-._·')) + self.assertTrue(is_valid_name('_')) + self.assertTrue(is_valid_name(':')) + self.assertTrue(is_valid_name('Ñàḿĕ')) + self.assertTrue(is_valid_name('\U000EFFFF')) + self.assertFalse(is_valid_name('0')) + self.assertFalse(is_valid_name('-')) + self.assertFalse(is_valid_name('.')) + self.assertFalse(is_valid_name('·')) + self.assertFalse(is_valid_name('na me')) + for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000': + self.assertFalse(is_valid_name('name' + c)) + # ===== make_parser def test_make_parser(self): # Creating a parser should succeed - it should fall back diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py index c1612ea1cebc5d..62da6f02c3f26f 100644 --- a/Lib/xml/sax/saxutils.py +++ b/Lib/xml/sax/saxutils.py @@ -3,9 +3,12 @@ convenience of application and driver writers. """ -import os, urllib.parse, urllib.request -import io import codecs +import io +import os +import re +import urllib.parse +import urllib.request from . import handler from . import xmlreader @@ -67,6 +70,29 @@ def quoteattr(data, entities={}): data = '"%s"' % data return data +def is_valid_name(name): + """Test whether a string is a valid element or attribute name.""" + # https://www.w3.org/TR/xml/#NT-Name + return re.fullmatch( + # NameStartChar + '[' + ':A-Z_a-z' + '\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF' + '\u200C\u200D' + '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' + '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' + ']' + # NameChar + '[' + r'\-.0-9:A-Z_a-z' + '\xB7' + '\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF' + '\u200C\u200D\u203F\u2040' + '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' + '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' + ']*', + name) is not None + def _gettextwriter(out, encoding): if out is None: diff --git a/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst new file mode 100644 index 00000000000000..53588f184598d5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst @@ -0,0 +1,2 @@ +Add the :func:`~xml.sax.utils.is_valid_name` function, which allows to check +whether a string can be used as an element or attribute name in XML. From 4f37eed421f9353f13719777ee66a2919e8912c2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 8 Oct 2025 16:29:53 +0300 Subject: [PATCH 2/6] Fix references. --- Doc/whatsnew/3.15.rst | 6 +++--- .../Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index c907b3515ab015..871fd661b8e3df 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -580,10 +580,10 @@ xml.parsers.expat .. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack -xml.sax.utils -------------- +xml.sax.saxutils +---------------- -* Add the :func:`~xml.sax.utils.is_valid_name` function, which allows to check +* Add the :func:`~xml.sax.saxutils.is_valid_name` function, which allows to check whether a string can be used as an element or attribute name in XML. (Contributed by Serhiy Storchaka in :gh:`139489`.) diff --git a/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst index 53588f184598d5..3f99d7f7f957e0 100644 --- a/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst +++ b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst @@ -1,2 +1,2 @@ -Add the :func:`~xml.sax.utils.is_valid_name` function, which allows to check +Add the :func:`~xml.sax.saxutils.is_valid_name` function, which allows to check whether a string can be used as an element or attribute name in XML. From e814cfa888c30e021493b4a9bac7a1d68a291ec7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 30 Apr 2026 20:19:01 +0300 Subject: [PATCH 3/6] Move to the toplevel xml module. --- Doc/library/xml.rst | 2 -- Lib/test/test_sax.py | 4 +-- Lib/test/test_xml.py | 27 +++++++++++++++++++ ...-10-08-15-36-00.gh-issue-139489.W46tvn.rst | 2 +- 4 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 Lib/test/test_xml.py diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index d4e9dfb2e6b891..f9ffaa9a94aacc 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -57,8 +57,6 @@ This module also defines utility functions. ..versionadded:: next - .. versionadded:: 3.2 - .. _xml-security: .. _xml-vulnerabilities: diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 6a22ed90369922..29babd7bf6996a 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -9,8 +9,8 @@ except SAXReaderNotAvailable: # don't try to test this module if we cannot create a parser raise unittest.SkipTest("no XML parsers available") -from xml.sax.saxutils import (XMLGenerator, escape, unescape, quoteattr, - XMLFilterBase, prepare_input_source) +from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ + XMLFilterBase, prepare_input_source from xml.sax.expatreader import create_parser from xml.sax.handler import (feature_namespaces, feature_external_ges, LexicalHandler) diff --git a/Lib/test/test_xml.py b/Lib/test/test_xml.py new file mode 100644 index 00000000000000..fd3633e43982d7 --- /dev/null +++ b/Lib/test/test_xml.py @@ -0,0 +1,27 @@ +import xml +import unittest + + +class TestUtils(unittest.TestCase): + + def test_is_valid_name(self): + is_valid_name = xml.is_valid_name + self.assertFalse(is_valid_name('')) + self.assertTrue(is_valid_name('name')) + self.assertTrue(is_valid_name('NAME')) + self.assertTrue(is_valid_name('name0:-._·')) + self.assertTrue(is_valid_name('_')) + self.assertTrue(is_valid_name(':')) + self.assertTrue(is_valid_name('Ñàḿĕ')) + self.assertTrue(is_valid_name('\U000EFFFF')) + self.assertFalse(is_valid_name('0')) + self.assertFalse(is_valid_name('-')) + self.assertFalse(is_valid_name('.')) + self.assertFalse(is_valid_name('·')) + self.assertFalse(is_valid_name('na me')) + for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000': + self.assertFalse(is_valid_name('name' + c)) + + +if __name__ == '__main__': + unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst index 3f99d7f7f957e0..05edefda053a08 100644 --- a/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst +++ b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst @@ -1,2 +1,2 @@ -Add the :func:`~xml.sax.saxutils.is_valid_name` function, which allows to check +Add the :func:`xml.is_valid_name` function, which allows to check whether a string can be used as an element or attribute name in XML. From bc26874c80206e77f5c1d4fbbb1748249768d941 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 30 Apr 2026 20:42:47 +0300 Subject: [PATCH 4/6] Use possesive quantifier for optimization. --- Lib/xml/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/xml/utils.py b/Lib/xml/utils.py index 14512d85b41593..5158fc6641caa9 100644 --- a/Lib/xml/utils.py +++ b/Lib/xml/utils.py @@ -21,5 +21,5 @@ def is_valid_name(name): '\u200C\u200D\u203F\u2040' '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' - ']*', + ']*+', name) is not None From 7e964920cc5119d39b2b49e9feb3b4bb6f9fbb5f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 2 May 2026 17:09:11 +0300 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Victor Stinner --- Doc/whatsnew/3.15.rst | 2 +- Lib/xml/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 25a075939a4d59..91b53570f5420f 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1533,7 +1533,7 @@ wave xml --- -* Add the :func:`~xml.sax.saxutils.is_valid_name` function, which allows to check +* Add the :func:`xml.is_valid_name` function, which allows to check whether a string can be used as an element or attribute name in XML. (Contributed by Serhiy Storchaka in :gh:`139489`.) diff --git a/Lib/xml/utils.py b/Lib/xml/utils.py index 5158fc6641caa9..7825fe917bce33 100644 --- a/Lib/xml/utils.py +++ b/Lib/xml/utils.py @@ -1,4 +1,4 @@ -import re +lazy import re def is_valid_name(name): From 3245a65da4acc3796432a7716ee9211ceb386f31 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 4 May 2026 12:22:07 +0300 Subject: [PATCH 6/6] Import re as _re. --- Lib/xml/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/xml/utils.py b/Lib/xml/utils.py index 7825fe917bce33..c9a0b260675bed 100644 --- a/Lib/xml/utils.py +++ b/Lib/xml/utils.py @@ -1,10 +1,10 @@ -lazy import re +lazy import re as _re def is_valid_name(name): """Test whether a string is a valid element or attribute name.""" # https://www.w3.org/TR/xml/#NT-Name - return re.fullmatch( + return _re.fullmatch( # NameStartChar '[' ':A-Z_a-z'