python · omkar-334 · Jun 21, 2026 · Jun 21, 2026 · Jun 21, 2026 · ZeroIntensity
diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
@@ -53,6 +53,30 @@ struct _Py_global_strings {
         STRUCT_FOR_STR(list_err, "list index out of range")
         STRUCT_FOR_STR(native, "<native>")
         STRUCT_FOR_STR(str_replace_inf, "1e309")
+        STRUCT_FOR_STR(token_amperequal, "&=")
+        STRUCT_FOR_STR(token_atequal, "@=")
+        STRUCT_FOR_STR(token_circumflexequal, "^=")
+        STRUCT_FOR_STR(token_colonequal, ":=")
+        STRUCT_FOR_STR(token_double_slash, "//")
+        STRUCT_FOR_STR(token_double_slashequal, "//=")
+        STRUCT_FOR_STR(token_doublestar, "**")
+        STRUCT_FOR_STR(token_doublestarequal, "**=")
+        STRUCT_FOR_STR(token_ellipsis, "...")
+        STRUCT_FOR_STR(token_eqequal, "==")
+        STRUCT_FOR_STR(token_greaterequal, ">=")
+        STRUCT_FOR_STR(token_leftshift, "<<")
+        STRUCT_FOR_STR(token_leftshiftequal, "<<=")
+        STRUCT_FOR_STR(token_lessequal, "<=")
+        STRUCT_FOR_STR(token_minequal, "-=")
+        STRUCT_FOR_STR(token_notequal, "!=")
+        STRUCT_FOR_STR(token_percentequal, "%=")
+        STRUCT_FOR_STR(token_plusequal, "+=")
+        STRUCT_FOR_STR(token_rarrow, "->")
+        STRUCT_FOR_STR(token_rightshift, ">>")
+        STRUCT_FOR_STR(token_rightshiftequal, ">>=")
+        STRUCT_FOR_STR(token_slashequal, "/=")
+        STRUCT_FOR_STR(token_starequal, "*=")
+        STRUCT_FOR_STR(token_vbarequal, "|=")
         STRUCT_FOR_STR(type_params, ".type_params")
         STRUCT_FOR_STR(utf_8, "utf-8")
     } literals;

diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
@@ -1885,6 +1885,45 @@ def test_exact_type(self):
                                   token.NAME, token.AMPER, token.NUMBER,
                                   token.RPAR)
 
+    def test_exact_operator_token_strings_are_reused(self):
+        operators = (
+            '...', '->', '!=', '%=', '&=', '**', '**=', '*=',
+            '+=', '-=', '//', '//=', '/=', ':=', '<<', '<<=',
+            '<=', '==', '>=', '>>', '>>=', '@=', '^=', '|=',
+        )
+        for op in operators:
+            with self.subTest(op=op):
+                source = f'{op} {op}\n'.encode()
+                tokens = list(tokenize.tokenize(BytesIO(source).readline))
+                matches = [tok.string for tok in tokens if tok.string == op]
+                self.assertEqual(len(matches), 2)
+                self.assertIs(matches[0], matches[1])
+
+    def test_exact_operator_token_strings_are_reused_by_c_tokenizer(self):
+        operators = (
+            '...', '->', '!=', '%=', '&=', '**', '**=', '*=',
+            '+=', '-=', '//', '//=', '/=', ':=', '<<', '<<=',
+            '<=', '==', '>=', '>>', '>>=', '@=', '^=', '|=',
+        )
+        for op in operators:
+            with self.subTest(op=op):
+                source = BytesIO(f'{op} {op}\n'.encode())
+                tokens = list(tokenize._tokenize.TokenizerIter(
+                    source.readline, encoding='utf-8', extra_tokens=True))
+                matches = [tok[1] for tok in tokens if tok[1] == op]
+                self.assertEqual(len(matches), 2)
+                self.assertIs(matches[0], matches[1])
+
+    def test_old_not_equal_spelling_is_not_rewritten(self):
+        source = BytesIO(
+            b'from __future__ import barry_as_FLUFL\n'
+            b'a <> b\n'
+        )
+        tokens = list(tokenize._tokenize.TokenizerIter(
+            source.readline, encoding='utf-8', extra_tokens=True))
+        self.assertIn('<>', [tok[1] for tok in tokens])
+        self.assertNotIn('!=', [tok[1] for tok in tokens])
+
     def test_pathological_trailing_whitespace(self):
         # See http://bugs.python.org/issue16152
         self.assertExactTypeEqual('@          ', token.AT)

diff --git a/Misc/NEWS.d/next/Library/2026-06-21-00-00-00.gh-issue-136757.9VxToken.rst b/Misc/NEWS.d/next/Library/2026-06-21-00-00-00.gh-issue-136757.9VxToken.rst
@@ -0,0 +1,2 @@
+Reduce duplicate string allocations in :mod:`tokenize` by reusing static
+strings for exact multi-character operator tokens.
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
@@ -1,12 +1,38 @@
 #include "Python.h"
 #include "errcode.h"
 #include "internal/pycore_critical_section.h"   // Py_BEGIN_CRITICAL_SECTION
+#include "internal/pycore_global_strings.h"     // _Py_DECLARE_STR()
 #include "internal/pycore_tuple.h"              // _PyTuple_FromPair
 #include "../Parser/lexer/state.h"
 #include "../Parser/lexer/lexer.h"
 #include "../Parser/tokenizer/tokenizer.h"
 #include "../Parser/pegen.h"                    // _PyPegen_byte_offset_to_character_offset()
 
+_Py_DECLARE_STR(token_atequal, "@=")
+_Py_DECLARE_STR(token_circumflexequal, "^=")
+_Py_DECLARE_STR(token_colonequal, ":=")
+_Py_DECLARE_STR(token_double_slash, "//")
+_Py_DECLARE_STR(token_double_slashequal, "//=")
+_Py_DECLARE_STR(token_doublestar, "**")
+_Py_DECLARE_STR(token_doublestarequal, "**=")
+_Py_DECLARE_STR(token_ellipsis, "...")
+_Py_DECLARE_STR(token_eqequal, "==")
+_Py_DECLARE_STR(token_greaterequal, ">=")
+_Py_DECLARE_STR(token_leftshift, "<<")
+_Py_DECLARE_STR(token_leftshiftequal, "<<=")
+_Py_DECLARE_STR(token_lessequal, "<=")
+_Py_DECLARE_STR(token_minequal, "-=")
+_Py_DECLARE_STR(token_notequal, "!=")
+_Py_DECLARE_STR(token_percentequal, "%=")
+_Py_DECLARE_STR(token_plusequal, "+=")
+_Py_DECLARE_STR(token_rarrow, "->")
+_Py_DECLARE_STR(token_rightshift, ">>")
+_Py_DECLARE_STR(token_rightshiftequal, ">>=")
+_Py_DECLARE_STR(token_slashequal, "/=")
+_Py_DECLARE_STR(token_starequal, "*=")
+_Py_DECLARE_STR(token_vbarequal, "|=")
+_Py_DECLARE_STR(token_amperequal, "&=")
+
 static struct PyModuleDef _tokenizemodule;
 
 typedef struct {
@@ -42,6 +68,49 @@ typedef struct
     Py_ssize_t byte_col_offset_diff;
 } tokenizeriterobject;
 
+static PyObject *
+get_static_exact_token_str(int type, const char *start, Py_ssize_t len)
+{
+#define RETURN_STATIC_TOKEN_STR(TYPE, NAME, LITERAL) \
+    case TYPE: \
+        if (len == (Py_ssize_t)(sizeof(LITERAL) - 1) \
+            && memcmp(start, LITERAL, sizeof(LITERAL) - 1) == 0) \
+        { \
+            return Py_NewRef(&_Py_STR(NAME)); \
+        } \
+        break
+
+    switch (type) {
+        RETURN_STATIC_TOKEN_STR(ATEQUAL, token_atequal, "@=");
+        RETURN_STATIC_TOKEN_STR(CIRCUMFLEXEQUAL, token_circumflexequal, "^=");
+        RETURN_STATIC_TOKEN_STR(COLONEQUAL, token_colonequal, ":=");
+        RETURN_STATIC_TOKEN_STR(DOUBLESLASH, token_double_slash, "//");
+        RETURN_STATIC_TOKEN_STR(DOUBLESLASHEQUAL, token_double_slashequal, "//=");
+        RETURN_STATIC_TOKEN_STR(DOUBLESTAR, token_doublestar, "**");
+        RETURN_STATIC_TOKEN_STR(DOUBLESTAREQUAL, token_doublestarequal, "**=");
+        RETURN_STATIC_TOKEN_STR(ELLIPSIS, token_ellipsis, "...");
+        RETURN_STATIC_TOKEN_STR(EQEQUAL, token_eqequal, "==");
+        RETURN_STATIC_TOKEN_STR(GREATEREQUAL, token_greaterequal, ">=");
+        RETURN_STATIC_TOKEN_STR(LEFTSHIFT, token_leftshift, "<<");
+        RETURN_STATIC_TOKEN_STR(LEFTSHIFTEQUAL, token_leftshiftequal, "<<=");
+        RETURN_STATIC_TOKEN_STR(LESSEQUAL, token_lessequal, "<=");
+        RETURN_STATIC_TOKEN_STR(MINEQUAL, token_minequal, "-=");
+        RETURN_STATIC_TOKEN_STR(NOTEQUAL, token_notequal, "!=");
+        RETURN_STATIC_TOKEN_STR(PERCENTEQUAL, token_percentequal, "%=");
+        RETURN_STATIC_TOKEN_STR(PLUSEQUAL, token_plusequal, "+=");
+        RETURN_STATIC_TOKEN_STR(RARROW, token_rarrow, "->");
+        RETURN_STATIC_TOKEN_STR(RIGHTSHIFT, token_rightshift, ">>");
+        RETURN_STATIC_TOKEN_STR(RIGHTSHIFTEQUAL, token_rightshiftequal, ">>=");
+        RETURN_STATIC_TOKEN_STR(SLASHEQUAL, token_slashequal, "/=");
+        RETURN_STATIC_TOKEN_STR(STAREQUAL, token_starequal, "*=");
+        RETURN_STATIC_TOKEN_STR(VBAREQUAL, token_vbarequal, "|=");
+        RETURN_STATIC_TOKEN_STR(AMPEREQUAL, token_amperequal, "&=");
+    }
+
+#undef RETURN_STATIC_TOKEN_STR
+    return NULL;
+}
+
 /*[clinic input]
 @classmethod
 _tokenizer.tokenizeriter.__new__ as tokenizeriter_new
@@ -268,7 +337,11 @@ tokenizeriter_next(PyObject *op)
         str = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
     }
     else {
-        str = PyUnicode_FromStringAndSize(token.start, token.end - token.start);
+        Py_ssize_t len = token.end - token.start;
+        str = get_static_exact_token_str(type, token.start, len);
+        if (str == NULL) {
+            str = PyUnicode_FromStringAndSize(token.start, len);
+        }
     }
     if (str == NULL) {
         goto exit;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Reduce duplicate string allocations in :mod:`tokenize` by reusing static
		strings for exact multi-character operator tokens.