tj-python · pull · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
diff --git a/Doc/c-api/float.rst b/Doc/c-api/float.rst
@@ -190,24 +190,23 @@ The pack and unpack functions provide an efficient platform-independent way to
 store floating-point values as byte strings. The Pack routines produce a bytes
 string from a C :c:expr:`double`, and the Unpack routines produce a C
 :c:expr:`double` from such a bytes string. The suffix (2, 4 or 8) specifies the
-number of bytes in the bytes string.
+number of bytes in the bytes string:
 
-On platforms that appear to use IEEE 754 formats these functions work by
-copying bits. On other platforms, the 2-byte format is identical to the IEEE
-754 binary16 half-precision format, the 4-byte format (32-bit) is identical to
-the IEEE 754 binary32 single precision format, and the 8-byte format to the
-IEEE 754 binary64 double precision format, although the packing of INFs and
-NaNs (if such things exist on the platform) isn't handled correctly, and
-attempting to unpack a bytes string containing an IEEE INF or NaN will raise an
-exception.
+* The 2-byte format is the IEEE 754 binary16 half-precision format.
+* The 4-byte format is the IEEE 754 binary32 single-precision format.
+* The 8-byte format is the IEEE 754 binary64 double-precision format.
 
-Note that NaN type may not be preserved on IEEE platforms (signaling NaNs become
-quiet NaNs), for example on x86 systems in 32-bit mode.
+The NaN type may not be preserved on some platforms while unpacking (signaling
+NaNs become quiet NaNs), for example on x86 systems in 32-bit mode.
 
+It's assumed that the :c:expr:`double` type has the IEEE 754 binary64 double
+precision format.  What happens if it's not true is partly accidental (alas).
 On non-IEEE platforms with more precision, or larger dynamic range, than IEEE
 754 supports, not all values can be packed; on non-IEEE platforms with less
-precision, or smaller dynamic range, not all values can be unpacked. What
-happens in such cases is partly accidental (alas).
+precision, or smaller dynamic range, not all values can be unpacked.  The
+packing of special numbers like INFs and NaNs (if such things exist on the
+platform) may not be handled correctly, and attempting to unpack a bytes string
+containing an IEEE INF or NaN may raise an exception.
 
 .. versionadded:: 3.11
 
@@ -217,9 +216,9 @@ Pack functions
 The pack routines write 2, 4 or 8 bytes, starting at *p*. *le* is an
 :c:expr:`int` argument, non-zero if you want the bytes string in little-endian
 format (exponent last, at ``p+1``, ``p+3``, or ``p+6`` and ``p+7``), zero if you
-want big-endian format (exponent first, at *p*). The :c:macro:`PY_BIG_ENDIAN`
-constant can be used to use the native endian: it is equal to ``1`` on big
-endian processor, or ``0`` on little endian processor.
+want big-endian format (exponent first, at *p*). Use the :c:macro:`!PY_LITTLE_ENDIAN`
+constant to select the native endian: it is equal to ``0`` on big
+endian processor, or ``1`` on little endian processor.
 
 Return value: ``0`` if all is OK, ``-1`` if error (and an exception is set,
 most likely :exc:`OverflowError`).
@@ -236,21 +235,27 @@ most likely :exc:`OverflowError`).
 
    Pack a C double as the IEEE 754 binary64 double precision format.
 
+   .. impl-detail::
+      This function always succeeds in CPython.
+
 
 Unpack functions
 ^^^^^^^^^^^^^^^^
 
 The unpack routines read 2, 4 or 8 bytes, starting at *p*.  *le* is an
 :c:expr:`int` argument, non-zero if the bytes string is in little-endian format
 (exponent last, at ``p+1``, ``p+3`` or ``p+6`` and ``p+7``), zero if big-endian
-(exponent first, at *p*). The :c:macro:`PY_BIG_ENDIAN` constant can be used to
-use the native endian: it is equal to ``1`` on big endian processor, or ``0``
+(exponent first, at *p*). Use the :c:macro:`!PY_LITTLE_ENDIAN` constant to
+select the native endian: it is equal to ``0`` on big endian processor, or ``1``
 on little endian processor.
 
 Return value: The unpacked double.  On error, this is ``-1.0`` and
 :c:func:`PyErr_Occurred` is true (and an exception is set, most likely
 :exc:`OverflowError`).
 
+.. impl-detail::
+    These functions always succeed in CPython.
+
 .. c:function:: double PyFloat_Unpack2(const char *p, int le)
 
    Unpack the IEEE 754 binary16 half-precision format as a C double.

diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore
@@ -3,7 +3,6 @@
 # Keep lines sorted lexicographically to help avoid merge conflicts.
 
 Doc/c-api/descriptor.rst
-Doc/c-api/float.rst
 Doc/c-api/init_config.rst
 Doc/c-api/intro.rst
 Doc/c-api/stable.rst

diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
@@ -1358,7 +1358,7 @@ The JIT avoids :term:`reference count`\ s where possible. This generally
 reduces the cost of most operations in Python.
 
 (Contributed by Ken Jin, Donghee Na, Zheao Li, Hai Zhu, Savannah Ostrowski,
-Reiden Ong, Noam Cohen, Tomas Roun, PuQing, and Cajetan Rodrigues in :gh:`134584`.)
+Reiden Ong, Noam Cohen, Tomas Roun, PuQing, Cajetan Rodrigues, and Sacul in :gh:`134584`.)
 
 .. rubric:: Better machine code generation
 

diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
@@ -2832,6 +2832,18 @@ def testfunc(n):
         self.assertIn("_GUARD_TYPE_VERSION", uops)
         self.assertNotIn("_CHECK_ATTR_CLASS", uops)
 
+    def test_load_common_constant(self):
+        def testfunc(n):
+            for _ in range(n):
+                x = list(i for i in ())
+            return x
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, list(()))
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BUILD_LIST", uops)
+        self.assertNotIn("_LOAD_COMMON_CONSTANT", uops)
+
     def test_load_small_int(self):
         def testfunc(n):
             x = 0

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
@@ -1765,13 +1765,15 @@ def test_basic(self):
         writer.write_utf8(b'var', -1)
 
         # test PyUnicodeWriter_WriteChar()
-        writer.write_char('=')
+        writer.write_char(ord('='))
 
         # test PyUnicodeWriter_WriteSubstring()
         writer.write_substring("[long]", 1, 5)
+        # CRASHES writer.write_substring(NULL, 0, 0)
 
         # test PyUnicodeWriter_WriteStr()
         writer.write_str(" value ")
+        # CRASHES writer.write_str(NULL)
 
         # test PyUnicodeWriter_WriteRepr()
         writer.write_repr("repr")
@@ -1786,21 +1788,38 @@ def test_repr_null(self):
         self.assertEqual(writer.finish(),
                          "var=<NULL>")
 
+    def test_write_char(self):
+        writer = self.create_writer(0)
+        writer.write_char(0)
+        writer.write_char(ord('$'))
+        writer.write_char(0x20ac)
+        writer.write_char(0x10_ffff)
+        self.assertRaises(ValueError, writer.write_char, 0x11_0000)
+        self.assertRaises(ValueError, writer.write_char, 0xFFFF_FFFF)
+        self.assertEqual(writer.finish(),
+                         "\0$\u20AC\U0010FFFF")
+
     def test_utf8(self):
         writer = self.create_writer(0)
         writer.write_utf8(b"ascii", -1)
-        writer.write_char('-')
+        writer.write_char(ord('-'))
         writer.write_utf8(b"latin1=\xC3\xA9", -1)
-        writer.write_char('-')
+        writer.write_char(ord('-'))
         writer.write_utf8(b"euro=\xE2\x82\xAC", -1)
-        writer.write_char('.')
+        writer.write_char(ord('.'))
+        writer.write_utf8(NULL, 0)
+        # CRASHES writer.write_utf8(NULL, 1)
+        # CRASHES writer.write_utf8(NULL, -1)
         self.assertEqual(writer.finish(),
                          "ascii-latin1=\xE9-euro=\u20AC.")
 
     def test_ascii(self):
         writer = self.create_writer(0)
         writer.write_ascii(b"Hello ", -1)
         writer.write_ascii(b"", 0)
+        writer.write_ascii(NULL, 0)
+        # CRASHES writer.write_ascii(NULL, 1)
+        # CRASHES writer.write_ascii(NULL, -1)
         writer.write_ascii(b"Python! <truncated>", 6)
         self.assertEqual(writer.finish(), "Hello Python")
 
@@ -1817,6 +1836,9 @@ def test_recover_utf8_error(self):
         # write fails with an invalid string
         with self.assertRaises(UnicodeDecodeError):
             writer.write_utf8(b"invalid\xFF", -1)
+        with self.assertRaises(UnicodeDecodeError):
+            s = "truncated\u20AC".encode()
+            writer.write_utf8(s, len(s) - 1)
 
         # retry write with a valid string
         writer.write_utf8(b"valid", -1)
@@ -1828,13 +1850,19 @@ def test_decode_utf8(self):
         # test PyUnicodeWriter_DecodeUTF8Stateful()
         writer = self.create_writer(0)
         writer.decodeutf8stateful(b"ign\xFFore", -1, b"ignore")
-        writer.write_char('-')
+        writer.write_char(ord('-'))
         writer.decodeutf8stateful(b"replace\xFF", -1, b"replace")
-        writer.write_char('-')
+        writer.write_char(ord('-'))
 
         # incomplete trailing UTF-8 sequence
         writer.decodeutf8stateful(b"incomplete\xC3", -1, b"replace")
 
+        writer.decodeutf8stateful(NULL, 0, b"replace")
+        # CRASHES writer.decodeutf8stateful(NULL, 1, b"replace")
+        # CRASHES writer.decodeutf8stateful(NULL, -1, b"replace")
+        with self.assertRaises(UnicodeDecodeError):
+            writer.decodeutf8stateful(b"default\xFF", -1, NULL)
+
         self.assertEqual(writer.finish(),
                          "ignore-replace\uFFFD-incomplete\uFFFD")
 
@@ -1845,12 +1873,12 @@ def test_decode_utf8_consumed(self):
         # valid string
         consumed = writer.decodeutf8stateful(b"text", -1, b"strict", True)
         self.assertEqual(consumed, 4)
-        writer.write_char('-')
+        writer.write_char(ord('-'))
 
         # non-ASCII
         consumed = writer.decodeutf8stateful(b"\xC3\xA9-\xE2\x82\xAC", 6, b"strict", True)
         self.assertEqual(consumed, 6)
-        writer.write_char('-')
+        writer.write_char(ord('-'))
 
         # invalid UTF-8 (consumed is 0 on error)
         with self.assertRaises(UnicodeDecodeError):
@@ -1859,54 +1887,92 @@ def test_decode_utf8_consumed(self):
         # ignore error handler
         consumed = writer.decodeutf8stateful(b"more\xFF", -1, b"ignore", True)
         self.assertEqual(consumed, 5)
-        writer.write_char('-')
+        writer.write_char(ord('-'))
 
         # incomplete trailing UTF-8 sequence
         consumed = writer.decodeutf8stateful(b"incomplete\xC3", -1, b"ignore", True)
         self.assertEqual(consumed, 10)
+        writer.write_char(ord('-'))
 
-        self.assertEqual(writer.finish(), "text-\xE9-\u20AC-more-incomplete")
+        consumed = writer.decodeutf8stateful(NULL, 0, b"replace", True)
+        self.assertEqual(consumed, 0)
+        # CRASHES writer.decodeutf8stateful(NULL, 1, b"replace", True)
+        # CRASHES writer.decodeutf8stateful(NULL, -1, b"replace", True)
+        consumed = writer.decodeutf8stateful(b"default\xC3", -1, NULL, True)
+        self.assertEqual(consumed, 7)
+
+        self.assertEqual(writer.finish(), "text-\xE9-\u20AC-more-incomplete-default")
 
     def test_widechar(self):
+        from _testcapi import SIZEOF_WCHAR_T
+
+        if SIZEOF_WCHAR_T == 2:
+            encoding = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
+        elif SIZEOF_WCHAR_T == 4:
+            encoding = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
+
         writer = self.create_writer(0)
-        writer.write_widechar("latin1=\xE9")
-        writer.write_widechar("-")
-        writer.write_widechar("euro=\u20AC")
-        writer.write_char("-")
-        writer.write_widechar("max=\U0010ffff")
-        writer.write_char('.')
+        writer.write_widechar("latin1=\xE9".encode(encoding))
+        writer.write_char(ord("-"))
+        writer.write_widechar("euro=\u20AC".encode(encoding))
+        writer.write_char(ord("-"))
+        writer.write_widechar("max=\U0010ffff".encode(encoding))
+        writer.write_char(ord("-"))
+        writer.write_widechar("zeroes=".encode(encoding).ljust(SIZEOF_WCHAR_T * 10, b'\0'),
+                              10)
+        writer.write_char(ord('.'))
+
+        if SIZEOF_WCHAR_T == 4:
+            invalid = (b'\x00\x00\x11\x00' if sys.byteorder == 'little' else
+                       b'\x00\x11\x00\x00')
+            with self.assertRaises(ValueError):
+                writer.write_widechar("invalid=".encode(encoding) + invalid)
+        writer.write_widechar(b'', -5)
+        writer.write_widechar(NULL, 0)
+        # CRASHES writer.write_widechar(NULL, 1)
+        # CRASHES writer.write_widechar(NULL, -1)
+
         self.assertEqual(writer.finish(),
-                         "latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
+                         "latin1=\xE9-euro=\u20AC-max=\U0010ffff-zeroes=\0\0\0.")
 
     def test_ucs4(self):
+        encoding = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
+
         writer = self.create_writer(0)
-        writer.write_ucs4("ascii IGNORED", 5)
-        writer.write_char("-")
-        writer.write_ucs4("latin1=\xe9", 8)
-        writer.write_char("-")
-        writer.write_ucs4("euro=\u20ac", 6)
-        writer.write_char("-")
-        writer.write_ucs4("max=\U0010ffff", 5)
-        writer.write_char(".")
+        writer.write_ucs4("ascii IGNORED".encode(encoding), 5)
+        writer.write_char(ord("-"))
+        writer.write_ucs4("latin1=\xe9".encode(encoding))
+        writer.write_char(ord("-"))
+        writer.write_ucs4("euro=\u20ac".encode(encoding))
+        writer.write_char(ord("-"))
+        writer.write_ucs4("max=\U0010ffff".encode(encoding))
+        writer.write_char(ord("."))
         self.assertEqual(writer.finish(),
                          "ascii-latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
 
         # Test some special characters
         writer = self.create_writer(0)
         # Lone surrogate character
-        writer.write_ucs4("lone\uDC80", 5)
-        writer.write_char("-")
+        writer.write_ucs4("lone\uDC80".encode(encoding, 'surrogatepass'))
+        writer.write_char(ord("-"))
         # Surrogate pair
-        writer.write_ucs4("pair\uDBFF\uDFFF", 5)
-        writer.write_char("-")
-        writer.write_ucs4("null[\0]", 7)
+        writer.write_ucs4("pair\uD83D\uDC0D".encode(encoding, 'surrogatepass'))
+        writer.write_char(ord("-"))
+        writer.write_ucs4("null[\0]".encode(encoding), 7)
+        invalid = (b'\x00\x00\x11\x00' if sys.byteorder == 'little' else
+                   b'\x00\x11\x00\x00')
+        # CRASHES writer.write_ucs4("invalid".encode(encoding) + invalid)
+        writer.write_ucs4(NULL, 0)
+        # CRASHES writer.write_ucs4(NULL, 1)
         self.assertEqual(writer.finish(),
-                         "lone\udc80-pair\udbff-null[\0]")
+                         "lone\udc80-pair\ud83d\udc0d-null[\x00]")
 
         # invalid size
         writer = self.create_writer(0)
         with self.assertRaises(ValueError):
-            writer.write_ucs4("text", -1)
+            writer.write_ucs4("text".encode(encoding), -1)
+        self.assertRaises(ValueError, writer.write_ucs4, b'', -1)
+        self.assertRaises(ValueError, writer.write_ucs4, NULL, -1)
 
     def test_substring_empty(self):
         writer = self.create_writer(0)
@@ -1932,7 +1998,7 @@ def test_format(self):
         from ctypes import c_int
         writer = self.create_writer(0)
         self.writer_format(writer, b'%s %i', b'abc', c_int(123))
-        writer.write_char('.')
+        writer.write_char(ord('.'))
         self.assertEqual(writer.finish(), 'abc 123.')
 
     def test_recover_error(self):