|
1 | 1 | #include "Python.h" |
2 | 2 | #include "errcode.h" |
3 | 3 | #include "internal/pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION |
| 4 | +#include "internal/pycore_global_strings.h" // _Py_DECLARE_STR() |
4 | 5 | #include "internal/pycore_tuple.h" // _PyTuple_FromPair |
5 | 6 | #include "../Parser/lexer/state.h" |
6 | 7 | #include "../Parser/lexer/lexer.h" |
7 | 8 | #include "../Parser/tokenizer/tokenizer.h" |
8 | 9 | #include "../Parser/pegen.h" // _PyPegen_byte_offset_to_character_offset() |
9 | 10 |
|
| 11 | +_Py_DECLARE_STR(token_atequal, "@=") |
| 12 | +_Py_DECLARE_STR(token_circumflexequal, "^=") |
| 13 | +_Py_DECLARE_STR(token_colonequal, ":=") |
| 14 | +_Py_DECLARE_STR(token_double_slash, "//") |
| 15 | +_Py_DECLARE_STR(token_double_slashequal, "//=") |
| 16 | +_Py_DECLARE_STR(token_doublestar, "**") |
| 17 | +_Py_DECLARE_STR(token_doublestarequal, "**=") |
| 18 | +_Py_DECLARE_STR(token_ellipsis, "...") |
| 19 | +_Py_DECLARE_STR(token_eqequal, "==") |
| 20 | +_Py_DECLARE_STR(token_greaterequal, ">=") |
| 21 | +_Py_DECLARE_STR(token_leftshift, "<<") |
| 22 | +_Py_DECLARE_STR(token_leftshiftequal, "<<=") |
| 23 | +_Py_DECLARE_STR(token_lessequal, "<=") |
| 24 | +_Py_DECLARE_STR(token_minequal, "-=") |
| 25 | +_Py_DECLARE_STR(token_notequal, "!=") |
| 26 | +_Py_DECLARE_STR(token_percentequal, "%=") |
| 27 | +_Py_DECLARE_STR(token_plusequal, "+=") |
| 28 | +_Py_DECLARE_STR(token_rarrow, "->") |
| 29 | +_Py_DECLARE_STR(token_rightshift, ">>") |
| 30 | +_Py_DECLARE_STR(token_rightshiftequal, ">>=") |
| 31 | +_Py_DECLARE_STR(token_slashequal, "/=") |
| 32 | +_Py_DECLARE_STR(token_starequal, "*=") |
| 33 | +_Py_DECLARE_STR(token_vbarequal, "|=") |
| 34 | +_Py_DECLARE_STR(token_amperequal, "&=") |
| 35 | + |
10 | 36 | static struct PyModuleDef _tokenizemodule; |
11 | 37 |
|
12 | 38 | typedef struct { |
@@ -42,6 +68,49 @@ typedef struct |
42 | 68 | Py_ssize_t byte_col_offset_diff; |
43 | 69 | } tokenizeriterobject; |
44 | 70 |
|
| 71 | +static PyObject * |
| 72 | +get_static_exact_token_str(int type, const char *start, Py_ssize_t len) |
| 73 | +{ |
| 74 | +#define RETURN_STATIC_TOKEN_STR(TYPE, NAME, LITERAL) \ |
| 75 | + case TYPE: \ |
| 76 | + if (len == (Py_ssize_t)(sizeof(LITERAL) - 1) \ |
| 77 | + && memcmp(start, LITERAL, sizeof(LITERAL) - 1) == 0) \ |
| 78 | + { \ |
| 79 | + return Py_NewRef(&_Py_STR(NAME)); \ |
| 80 | + } \ |
| 81 | + break |
| 82 | + |
| 83 | + switch (type) { |
| 84 | + RETURN_STATIC_TOKEN_STR(ATEQUAL, token_atequal, "@="); |
| 85 | + RETURN_STATIC_TOKEN_STR(CIRCUMFLEXEQUAL, token_circumflexequal, "^="); |
| 86 | + RETURN_STATIC_TOKEN_STR(COLONEQUAL, token_colonequal, ":="); |
| 87 | + RETURN_STATIC_TOKEN_STR(DOUBLESLASH, token_double_slash, "//"); |
| 88 | + RETURN_STATIC_TOKEN_STR(DOUBLESLASHEQUAL, token_double_slashequal, "//="); |
| 89 | + RETURN_STATIC_TOKEN_STR(DOUBLESTAR, token_doublestar, "**"); |
| 90 | + RETURN_STATIC_TOKEN_STR(DOUBLESTAREQUAL, token_doublestarequal, "**="); |
| 91 | + RETURN_STATIC_TOKEN_STR(ELLIPSIS, token_ellipsis, "..."); |
| 92 | + RETURN_STATIC_TOKEN_STR(EQEQUAL, token_eqequal, "=="); |
| 93 | + RETURN_STATIC_TOKEN_STR(GREATEREQUAL, token_greaterequal, ">="); |
| 94 | + RETURN_STATIC_TOKEN_STR(LEFTSHIFT, token_leftshift, "<<"); |
| 95 | + RETURN_STATIC_TOKEN_STR(LEFTSHIFTEQUAL, token_leftshiftequal, "<<="); |
| 96 | + RETURN_STATIC_TOKEN_STR(LESSEQUAL, token_lessequal, "<="); |
| 97 | + RETURN_STATIC_TOKEN_STR(MINEQUAL, token_minequal, "-="); |
| 98 | + RETURN_STATIC_TOKEN_STR(NOTEQUAL, token_notequal, "!="); |
| 99 | + RETURN_STATIC_TOKEN_STR(PERCENTEQUAL, token_percentequal, "%="); |
| 100 | + RETURN_STATIC_TOKEN_STR(PLUSEQUAL, token_plusequal, "+="); |
| 101 | + RETURN_STATIC_TOKEN_STR(RARROW, token_rarrow, "->"); |
| 102 | + RETURN_STATIC_TOKEN_STR(RIGHTSHIFT, token_rightshift, ">>"); |
| 103 | + RETURN_STATIC_TOKEN_STR(RIGHTSHIFTEQUAL, token_rightshiftequal, ">>="); |
| 104 | + RETURN_STATIC_TOKEN_STR(SLASHEQUAL, token_slashequal, "/="); |
| 105 | + RETURN_STATIC_TOKEN_STR(STAREQUAL, token_starequal, "*="); |
| 106 | + RETURN_STATIC_TOKEN_STR(VBAREQUAL, token_vbarequal, "|="); |
| 107 | + RETURN_STATIC_TOKEN_STR(AMPEREQUAL, token_amperequal, "&="); |
| 108 | + } |
| 109 | + |
| 110 | +#undef RETURN_STATIC_TOKEN_STR |
| 111 | + return NULL; |
| 112 | +} |
| 113 | + |
45 | 114 | /*[clinic input] |
46 | 115 | @classmethod |
47 | 116 | _tokenizer.tokenizeriter.__new__ as tokenizeriter_new |
@@ -268,7 +337,11 @@ tokenizeriter_next(PyObject *op) |
268 | 337 | str = Py_GetConstant(Py_CONSTANT_EMPTY_STR); |
269 | 338 | } |
270 | 339 | else { |
271 | | - str = PyUnicode_FromStringAndSize(token.start, token.end - token.start); |
| 340 | + Py_ssize_t len = token.end - token.start; |
| 341 | + str = get_static_exact_token_str(type, token.start, len); |
| 342 | + if (str == NULL) { |
| 343 | + str = PyUnicode_FromStringAndSize(token.start, len); |
| 344 | + } |
272 | 345 | } |
273 | 346 | if (str == NULL) { |
274 | 347 | goto exit; |
|
0 commit comments